Set-Up workspace

There are two possible directories containing sequence data: * /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL ** this directory is for running the code on UVA’s HPC Rivanna

  • /Users/kelseyschoenemann/Desktop/Bioinformatics/2024-09-27_MiSeq_v3/rbcL ** this directory is for running the code on my local machine

(you can easily switch between these two directories by selecting the old path, up to & including Bioinformatics, and hit Cmd F to bring up Find & Replace tool, then copy-paste the new path into the Replace box and hit All. There should be XX replacements)

Summary of Files / Directories

/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL *Ns & primers present (raw files)

/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/filtN *Ns removed, primers present (pre-filtered)

/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt *Ns & primers removed (cutadapted)

/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered *Ns & primers removed and filter & trimmed (filtered)

  • List of primers used * We used plant universal primers: iTru_ITS2_S2F (ATGCGATACTTGGTGTGAAT) & iTru_ITS2_4R (TCCTCCGCTTATTGATATGC) iTru_rbcL2 (TGGCAGCATTYCGAGTAACTC) & iTru_rbcLa-R (GTAAAATCAAGTCCACCRCG)

Load packages

#install packages with BiocManager (if you have anaconda)
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("dada2", version = "3.16")
# BiocManager::install(c("DECIPHER", "ShortRead", "phyloseq"))
# BiocManager::install("decontam")

library(devtools); packageVersion("devtools")
## Loading required package: usethis
## [1] '2.4.5'
library(dada2); packageVersion("dada2")
## Loading required package: Rcpp
## [1] '1.32.0'
library(ShortRead); packageVersion("ShortRead")
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: Rsamtools
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## [1] '1.62.0'
library(Biostrings); packageVersion("Biostrings")
## [1] '2.72.1'
library(DECIPHER); packageVersion("DECIPHER")
## [1] '3.0.0'
library(phyloseq); packageVersion("phyloseq") 
## 
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     distance
## The following object is masked from 'package:Biobase':
## 
##     sampleNames
## The following object is masked from 'package:GenomicRanges':
## 
##     distance
## The following object is masked from 'package:IRanges':
## 
##     distance
## [1] '1.48.0'
library(ggplot2); packageVersion("ggplot2")
## [1] '3.5.1'
#library(decontam); packageVersion("decontam")

#devtools::install_github("benjjneb/dada2", ref="v1.16") # change the ref argument to get other versions

Set working directory

setwd("/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL")
path <- "/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL" ## CHANGE ME to the directory containing the fastq files.
head(list.files(path, pattern = "*.fastq"))
## [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"
## [2] "rbcL-2020-6-16-H1_S293_L001_R2_001.fastq"
## [3] "rbcL-2020-6-16-H5_S294_L001_R1_001.fastq"
## [4] "rbcL-2020-6-16-H5_S294_L001_R2_001.fastq"
## [5] "rbcL-2020-6-16-H6_S295_L001_R1_001.fastq"
## [6] "rbcL-2020-6-16-H6_S295_L001_R2_001.fastq"
list.files(path, pattern = "*.fastq")[1]
## [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"
#R.utils::gunzip(list.files(path), remove=F)
R.utils::isGzipped(list.files(path, pattern = "*.fastq")[1]) # checking that the file is unzipped, FALSE = not gzipped
## [1] FALSE
# intstall R.utils
# library(R.utils)
# lapply(list.files(path, pattern = "*.gz"), FUN=gunzip, remove=F) # unzip all .gz files and don't remove compressed files
# I manually moved all compressed files into a new folder, leaving these unzipped files in the working directory for this script
# commenting out since I only need to unzip once

Prep raw sequence reads

Match forward and reverse reads by sample name. Pre-filter to remove reads with Ns.

Generate matched lists of fwd & rev reads with sample name

Forward and reverse fastq files have the format: rbcL_SAMPLENAME_SXXX_L001_R1_001.fastq and rbcL_SAMPLENAME_SXXX_L001_R2_001.fastq, respectively

For example: rbcL-2020-6-16-H1_S293_L001_R1_001.fastq is the forward reads of rbcL sample 2020-06-16-H1

fnFs <- sort(list.files(path, pattern = "L001_R1_001.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "L001_R2_001.fastq", full.names = TRUE))

#string parsing may have to be altered in your own data if your file names have a different format.

Pre-filter to remove reads with Ns

Ambiguous bases (Ns) in the sequencing reads makes accurate mapping of short primer sequences difficult. Here, remove reads with Ns, but perform no other filtering.

fnFs.filtN <- file.path(path, "filtN", basename(fnFs)) #create directory paths to contain N-filterd files in filtN/ subdirectory within path
fnRs.filtN <- file.path(path, "filtN", basename(fnRs))

Now we can filter out whole sequences and trim parts of sequences based on their quality score. This function takes files from path /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL and creates new files in filtN folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/filtN

filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE, matchIDs = T, compress = F) #eliminates sequences with more than 0 Ns;
## Some input samples had no reads pass the filter.
#I had an issue with "Mismatched forward and reverse sequence files" but adding the matchID=T parameter fixed it; #I had an issue with the filtN files being compressed so the cutadapt command couldn't read the files ("UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte"), adding the compress = F parameter fixed it (("Or you could just gzip all your files at the beginning." - benjjneb)??)

Check how much filtering affected read numbers

#before filterAndTrim
plotQualityProfile(fnFs[1:1]) #checking quality and number of FWD reads of first sample

plotQualityProfile(fnRs[1:1]) #checking quality and number of REV reads of first sample

#after filterAndTrim
plotQualityProfile(fnFs.filtN[1:1]) #checking quality and number of FWD reads of first sample

plotQualityProfile(fnRs.filtN[1:1]) #checking quality and number of REV reads of first sample

Not every sample made it through the pre-filter to remove reads with Ns

length(file.path(path, "filtN", basename(fnFs))) #length of "fnFs.filtN," created in chunk above (261)
## [1] 261
length(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE)) #length of files actually written to the fnFs.filtN directories (258)
## [1] 258

Updating path names (after samples drop out)

# update directory, since not all samples made it thru the filter
fnFs.filtN <- file.path(path, "filtN", basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE))) 
fnRs.filtN <- file.path(path, "filtN", basename(list.files(file.path(path, "filtN"), pattern = "L001_R2_001.fastq", full.names = TRUE)))

Identify primers in reads

Verify the presence and orientation of these primers

#rbcL primers
FWD <- "TGGCAGCATTYCGAGTAACTC"  ## CHANGE ME to your forward primer sequence
REV <- "GTAAAATCAAGTCCACCRCG"  ## CHANGE ME...

#to ensure we have the right primers, and the correct orientation of the primers on the reads, we will verify the presence and orientation of these primers in the data

allOrients <- function(primer) {
  # Create all orientations of the input sequence
  require(Biostrings)
  dna <- DNAString(primer)  # The Biostrings works w/ DNAString objects rather than character vectors
  orients <- c(Forward = dna, Complement = complement(dna), Reverse = reverse(dna),
               RevComp = reverseComplement(dna))
  return(sapply(orients, toString))  # Convert back to character vector
}

FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)

FWD.orients #all possible orientations of forward
##                 Forward              Complement                 Reverse 
## "TGGCAGCATTYCGAGTAACTC" "ACCGTCGTAARGCTCATTGAG" "CTCAATGAGCYTTACGACGGT" 
##                 RevComp 
## "GAGTTACTCGRAATGCTGCCA"
REV.orients #...and reverse primers
##                Forward             Complement                Reverse 
## "GTAAAATCAAGTCCACCRCG" "CATTTTAGTTCAGGTGGYGC" "GCRCCACCTGAACTAAAATG" 
##                RevComp 
## "CGYGGTGGACTTGATTTTAC"

Count how many times primers appear in reads

We are now ready to count the number of times the primers appear in the forward and reverse read, while considering all possible primer orientations. Identifying and counting the primers on one set of paired end FASTQ files is sufficient, assuming all the files were created using the same library preparation, so we’ll just process the first sample.

primerHits <- function(primer, fn) {
  # Counts number of reads in which the primer is found
  nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
  return(sum(nhits > 0))
}

rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.filtN[[1]]),
      FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.filtN[[1]]),
      REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.filtN[[1]]),
      REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.filtN[[1]]))
##                  Forward Complement Reverse RevComp
## FWD.ForwardReads   15698          0       0       0
## FWD.ReverseReads       0          0       0       0
## REV.ForwardReads       0          0       0       0
## REV.ReverseReads   13318          0       0       0

Note: Orientation mixups are a common trip-up. If, for example, the REV primer is matching the Reverse reads in its RevComp orientation, then replace REV with its reverse-complement orientation (REV <- REV.orient[[“RevComp”]]) before proceeding.

Remove primers from reads

These primers can be now removed using a specialized primer/adapter removal tool. Here, we use cutadapt for this purpose. Download, installation and usage instructions are available online: http://cutadapt.readthedocs.io/en/stable/index.html

#cutadapt <- "/Users/kelseyschoenemann/opt/anaconda3/envs/cutadaptenv/bin/cutadapt" #CHANGE ME to the cutadapt path on your local machine

cutadapt <- "/home/kls7sg/.local/bin/cutadapt" #for running on Rivanna HPC
system2(cutadapt, args = "--version") # Run shell commands from R

If the above command successfully executed, R has found cutadapt and you are ready to continue following along.

We now create output filenames for the cutadapt-ed files, and define the parameters we are going to give the cutadapt command. The critical parameters are the primers, and they need to be in the right orientation, i.e. the FWD primer should have been matching the forward-reads in its forward orientation, and the REV primer should have been matching the reverse-reads in its forward orientation.

Create directory with updated path names

path.cut <- file.path(path, "cutadapt"); if(!dir.exists(path.cut)) dir.create(path.cut) #create a new folder in the main directory called cutadapt
#/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt

# fnFs.cut <- file.path(path.cut, basename(fnFs)) #old code #to place fwd reads w/o primers in the new cutadapt directory
# fnRs.cut <- file.path(path.cut, basename(fnRs)) #old code
           length(file.path(path, basename(fnFs)))    # 261 samples with F reads in original directory
## [1] 261
length(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE)) # but only 258 samples passed the filtN filter (removing reads with Ns)
## [1] 258
# figuring out how to create/call directory paths with just retained samples
# basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE))[1]
# file.path(path.cut, basename(list.files(file.path(path, "filtN"), pattern = "L001_R1_001.fastq", full.names = TRUE))[1])

# here's an updated directory that only includes destinations for samples/files that still exist
fnFs.cut <- file.path(path.cut, sort(basename(list.files(file.path(path, "filtN"), pattern = "R1_001.fastq", full.names = TRUE)))) #to place forward reads with primers cut (removed) in the new cutadapt directory
fnRs.cut <- file.path(path.cut, sort(basename(list.files(file.path(path, "filtN"), pattern = "R2_001.fastq", full.names = TRUE)))) #to place reverse reads with primers cut (removed) in the new cutadapt directory

Running cutadapt

FWD.RC <- dada2:::rc(FWD) #generate reverse complement of fwd
REV.RC <- dada2:::rc(REV) #...and rev primers

R1.flags <- paste("-g", FWD, "-a", REV.RC) # To flag FWD and reverse-complement of REV for removal from forward reads (R1)
R2.flags <- paste("-G", REV, "-A", FWD.RC) # To flag REV and  reverse-complement of FWD for removal from reverse reads (R2)

# Run Cutadapt to cut flagged sequences from input reads and save cut sequences to output folder
#Warning: A lot of output will be written to the console by cutadapt!

for(i in seq_along(fnFs)) {
  system2(cutadapt, args = c(
    R1.flags, R2.flags, "-n", 2, #-n 2 required to remove FWD & REV from reads
    "-o", fnFs.cut[i], "-p", fnRs.cut[i], # output files
    fnFs.filtN[i], fnRs.filtN[i]) # input files
    )
}

Count primers again

As a sanity check, we will count the presence of primers in the first cutadapt-ed sample:

rbind(FWD.ForwardReads = sapply(FWD.orients, primerHits, fn = fnFs.cut[[1]]),
      FWD.ReverseReads = sapply(FWD.orients, primerHits, fn = fnRs.cut[[1]]),
      REV.ForwardReads = sapply(REV.orients, primerHits, fn = fnFs.cut[[1]]),
      REV.ReverseReads = sapply(REV.orients, primerHits, fn = fnRs.cut[[1]]))
##                  Forward Complement Reverse RevComp
## FWD.ForwardReads       0          0       0       0
## FWD.ReverseReads       0          0       0       0
## REV.ForwardReads       0          0       0       0
## REV.ReverseReads       0          0       0       0

Success! Primers are no longer detected in the cutadapted reads

The primer-free sequence files are now ready to be analyzed through the DADA2 pipeline.

#Prep the pre-filtered & “cutadapted” sequence reads

Generate matched lists of fwd & rev reads with sample name

Create path names (all samples included)

#the only thing changing from last time is 'path' becomes 'path.cut'
#fnRs <- sort(list.files(path, pattern = "_2.fastq.gz", full.names = TRUE))

cutFs <- sort(list.files(path.cut, pattern = "L001_R1_001.fastq", full.names = TRUE))
cutRs <- sort(list.files(path.cut, pattern = "L001_R2_001.fastq", full.names = TRUE))

Filter and trim “cutadapted” reads with filtering parameters

To store the output files of filtered reads as fastq.gz files, we’re creating another directory /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered

filtFs <- file.path(path.cut, "filtered", basename(cutFs))
filtRs <- file.path(path.cut, "filtered", basename(cutRs))
#recall, the PRE-filter filter:    filterAndTrim(fnFs, fnFs.filtN, fnRs, fnRs.filtN, maxN = 0, multithread = TRUE, matchIDs = T, compress=F) #eliminates sequences with more than 0 Ns

#NOW we filter for more stringent Quality Control
out <- filterAndTrim(cutFs, filtFs, cutRs, filtRs, maxN = 0, maxEE = c(2, 2), minLen = 50, rm.phix = TRUE, compress = TRUE, multithread = TRUE)
## Some input samples had no reads pass the filter.
head(out)
##                                          reads.in reads.out
## rbcL-2020-6-16-H1_S293_L001_R1_001.fastq    16617      9241
## rbcL-2020-6-16-H5_S294_L001_R1_001.fastq     9459      5929
## rbcL-2020-6-16-H6_S295_L001_R1_001.fastq     1296       850
## rbcL-2020-6-17-H2_S296_L001_R1_001.fastq     6917      4559
## rbcL-2020-6-17-H4_S297_L001_R1_001.fastq     3027      2027
## rbcL-2020-6-17-H8_S298_L001_R1_001.fastq    24212     15437
out.table<-as.data.frame(cbind(out,(out[,2]/out[,1])*100))
100-mean((out[,2]/out[,1])*100, na.rm=T) #loose 60% of reads on average
## [1] 40.16607

For this dataset, we will use standard filtering parameters:

  • maxN=0 #DADA2 requires sequences contain no Ns
  • truncQ=2 #trims/truncates reads at the first instance of a bp with quality score =< 2
  • rm.phix=TRUE PhiX (FEE-ex) is a small virus with single-stranded DNA that you can spike in samples to use as control in sequencing #removes these reads in filtering
  • maxEE=2 #sets the maximum number of “expected errors” allowed in a read
  • minLen=50 #sets the minimum bp length of reads #gets rid of spurious very low-length sequences.

Check how much filtering affected read numbers

n=20
#before filterAndTrim
plotQualityProfile(cutFs[n:n]) #checking quality and number of FWD reads of nth sample

#after filterAndTrim
plotQualityProfile(filtFs[n:n]) #checking quality and number of FWD reads of nth sample

Updating sample names (after samples drop out)

# updating sample names for "out"
length(rownames(as.data.frame(out)))
## [1] 258
                                           rownames(as.data.frame(out))
##   [1] "rbcL-2020-6-16-H1_S293_L001_R1_001.fastq"                       
##   [2] "rbcL-2020-6-16-H5_S294_L001_R1_001.fastq"                       
##   [3] "rbcL-2020-6-16-H6_S295_L001_R1_001.fastq"                       
##   [4] "rbcL-2020-6-17-H2_S296_L001_R1_001.fastq"                       
##   [5] "rbcL-2020-6-17-H4_S297_L001_R1_001.fastq"                       
##   [6] "rbcL-2020-6-17-H8_S298_L001_R1_001.fastq"                       
##   [7] "rbcL-2020-6-18-H3_S299_L001_R1_001.fastq"                       
##   [8] "rbcL-2020-6-18-H7_S300_L001_R1_001.fastq"                       
##   [9] "rbcL-2020-6-18-H9_S301_L001_R1_001.fastq"                       
##  [10] "rbcL-2020-6-3-H1_S302_L001_R1_001.fastq"                        
##  [11] "rbcL-2020-6-3-H5_S303_L001_R1_001.fastq"                        
##  [12] "rbcL-2020-6-3-H6_S304_L001_R1_001.fastq"                        
##  [13] "rbcL-2020-6-30-H1_S305_L001_R1_001.fastq"                       
##  [14] "rbcL-2020-6-30-H5_S306_L001_R1_001.fastq"                       
##  [15] "rbcL-2020-6-30-H6_S307_L001_R1_001.fastq"                       
##  [16] "rbcL-2020-6-4-H2_S308_L001_R1_001.fastq"                        
##  [17] "rbcL-2020-6-4-H4_S309_L001_R1_001.fastq"                        
##  [18] "rbcL-2020-6-4-H8_S310_L001_R1_001.fastq"                        
##  [19] "rbcL-2020-6-5-H3_S311_L001_R1_001.fastq"                        
##  [20] "rbcL-2020-6-5-H7_S312_L001_R1_001.fastq"                        
##  [21] "rbcL-2020-6-5-H9_S313_L001_R1_001.fastq"                        
##  [22] "rbcL-2020-7-1-H2_S314_L001_R1_001.fastq"                        
##  [23] "rbcL-2020-7-1-H4_S315_L001_R1_001.fastq"                        
##  [24] "rbcL-2020-7-1-H8_S316_L001_R1_001.fastq"                        
##  [25] "rbcL-2020-7-14-H1_S317_L001_R1_001.fastq"                       
##  [26] "rbcL-2020-7-14-H5_S318_L001_R1_001.fastq"                       
##  [27] "rbcL-2020-7-14-H6_S319_L001_R1_001.fastq"                       
##  [28] "rbcL-2020-7-15-H4_S321_L001_R1_001.fastq"                       
##  [29] "rbcL-2020-7-15-H8_S322_L001_R1_001.fastq"                       
##  [30] "rbcL-2020-7-16-H3_S323_L001_R1_001.fastq"                       
##  [31] "rbcL-2020-7-16-H7_S324_L001_R1_001.fastq"                       
##  [32] "rbcL-2020-7-16-H9_S325_L001_R1_001.fastq"                       
##  [33] "rbcL-2020-7-2-H3_S326_L001_R1_001.fastq"                        
##  [34] "rbcL-2020-7-2-H7_S327_L001_R1_001.fastq"                        
##  [35] "rbcL-2020-7-2-H9_S328_L001_R1_001.fastq"                        
##  [36] "rbcL-2021-6-13-H1_S329_L001_R1_001.fastq"                       
##  [37] "rbcL-2021-6-13-H3_S330_L001_R1_001.fastq"                       
##  [38] "rbcL-2021-6-14-H11_S331_L001_R1_001.fastq"                      
##  [39] "rbcL-2021-6-14-H6_S332_L001_R1_001.fastq"                       
##  [40] "rbcL-2021-6-14-H7_S333_L001_R1_001.fastq"                       
##  [41] "rbcL-2021-6-15-H8_S334_L001_R1_001.fastq"                       
##  [42] "rbcL-2021-6-21-H10_S335_L001_R1_001.fastq"                      
##  [43] "rbcL-2021-6-21-H12_S336_L001_R1_001.fastq"                      
##  [44] "rbcL-2021-6-21-H9_S337_L001_R1_001.fastq"                       
##  [45] "rbcL-2021-6-27-H21_S338_L001_R1_001.fastq"                      
##  [46] "rbcL-2021-6-27-H22_S339_L001_R1_001.fastq"                      
##  [47] "rbcL-2021-6-27-H27_S340_L001_R1_001.fastq"                      
##  [48] "rbcL-2021-6-28-H25_S341_L001_R1_001.fastq"                      
##  [49] "rbcL-2021-6-28-H26_S342_L001_R1_001.fastq"                      
##  [50] "rbcL-2021-6-28-H28_S343_L001_R1_001.fastq"                      
##  [51] "rbcL-2021-6-29-H17_S344_L001_R1_001.fastq"                      
##  [52] "rbcL-2021-6-29-H23_S345_L001_R1_001.fastq"                      
##  [53] "rbcL-2021-6-29-H24_S346_L001_R1_001.fastq"                      
##  [54] "rbcL-2021-6-4-H21_S347_L001_R1_001.fastq"                       
##  [55] "rbcL-2021-6-4-H22_S348_L001_R1_001.fastq"                       
##  [56] "rbcL-2021-6-4-H27_S349_L001_R1_001.fastq"                       
##  [57] "rbcL-2021-6-5-H18_S350_L001_R1_001.fastq"                       
##  [58] "rbcL-2021-6-5-H25_S351_L001_R1_001.fastq"                       
##  [59] "rbcL-2021-6-5-H26_S352_L001_R1_001.fastq"                       
##  [60] "rbcL-2021-6-6-H17_S353_L001_R1_001.fastq"                       
##  [61] "rbcL-2021-6-6-H24_S354_L001_R1_001.fastq"                       
##  [62] "rbcL-2021-6-7-H23_S355_L001_R1_001.fastq"                       
##  [63] "rbcL-2021-7-14-H10_S356_L001_R1_001.fastq"                      
##  [64] "rbcL-2021-7-14-H12_S357_L001_R1_001.fastq"                      
##  [65] "rbcL-2021-7-20-H27_S358_L001_R1_001.fastq"                      
##  [66] "rbcL-2021-7-21-H25_S359_L001_R1_001.fastq"                      
##  [67] "rbcL-2021-7-21-H26_S360_L001_R1_001.fastq"                      
##  [68] "rbcL-2021-7-6-H11_S362_L001_R1_001.fastq"                       
##  [69] "rbcL-2021-7-6-H6_S364_L001_R1_001.fastq"                        
##  [70] "rbcL-2021-7-7-H4_S365_L001_R1_001.fastq"                        
##  [71] "rbcL-2021-7-7-H8_S366_L001_R1_001.fastq"                        
##  [72] "rbcL-2021-7-8-H3_S367_L001_R1_001.fastq"                        
##  [73] "rbcL-2023-6-12-H3_S368_L001_R1_001.fastq"                       
##  [74] "rbcL-2023-6-12-H5_S369_L001_R1_001.fastq"                       
##  [75] "rbcL-2023-6-12-H7_S370_L001_R1_001.fastq"                       
##  [76] "rbcL-2023-6-13-H6_S371_L001_R1_001.fastq"                       
##  [77] "rbcL-2023-6-13-H8_S372_L001_R1_001.fastq"                       
##  [78] "rbcL-2023-6-13-H9_S373_L001_R1_001.fastq"                       
##  [79] "rbcL-2023-6-14-H3_S374_L001_R1_001.fastq"                       
##  [80] "rbcL-2023-6-14-H7_S375_L001_R1_001.fastq"                       
##  [81] "rbcL-2023-6-14-H9_S376_L001_R1_001.fastq"                       
##  [82] "rbcL-2023-6-16-H5_S377_L001_R1_001.fastq"                       
##  [83] "rbcL-2023-6-24-H6_S378_L001_R1_001.fastq"                       
##  [84] "rbcL-2023-6-24-H8_S379_L001_R1_001.fastq"                       
##  [85] "rbcL-2023-6-25-H2_S380_L001_R1_001.fastq"                       
##  [86] "rbcL-2023-6-25-H4_S381_L001_R1_001.fastq"                       
##  [87] "rbcL-2023-6-26-H1_S382_L001_R1_001.fastq"                       
##  [88] "rbcL-2023-6-26-H7_S383_L001_R1_001.fastq"                       
##  [89] "rbcL-2023-6-27-H3_S384_L001_R1_001.fastq"                       
##  [90] "rbcL-2023-6-27-H5_S385_L001_R1_001.fastq"                       
##  [91] "rbcL-2023-6-8-H1_S386_L001_R1_001.fastq"                        
##  [92] "rbcL-2023-6-8-H2_S387_L001_R1_001.fastq"                        
##  [93] "rbcL-2023-6-8-H4_S388_L001_R1_001.fastq"                        
##  [94] "rbcL-2023-6-9-H2_S389_L001_R1_001.fastq"                        
##  [95] "rbcL-2023-6-9-H4_S390_L001_R1_001.fastq"                        
##  [96] "rbcL-2023-7-15-H6_S391_L001_R1_001.fastq"                       
##  [97] "rbcL-2023-7-16-H4_S392_L001_R1_001.fastq"                       
##  [98] "rbcL-2023-7-17-H1_S393_L001_R1_001.fastq"                       
##  [99] "rbcL-2023-7-18-H3_S394_L001_R1_001.fastq"                       
## [100] "rbcL-2023-7-18-H7_S395_L001_R1_001.fastq"                       
## [101] "rbcL-2023-7-29-H5_S396_L001_R1_001.fastq"                       
## [102] "rbcL-2023-7-29-H7_S397_L001_R1_001.fastq"                       
## [103] "rbcL-2023-7-30-H8_S398_L001_R1_001.fastq"                       
## [104] "rbcL-2023-7-30-H9_S399_L001_R1_001.fastq"                       
## [105] "rbcL-2023-7-5-H1_S400_L001_R1_001.fastq"                        
## [106] "rbcL-2023-7-5-H2_S401_L001_R1_001.fastq"                        
## [107] "rbcL-2023-7-5-H4_S402_L001_R1_001.fastq"                        
## [108] "rbcL-2023-7-6-H6_S403_L001_R1_001.fastq"                        
## [109] "rbcL-2023-7-6-H8_S404_L001_R1_001.fastq"                        
## [110] "rbcL-2023-7-6-H9_S405_L001_R1_001.fastq"                        
## [111] "rbcL-2023-7-8-H3_S406_L001_R1_001.fastq"                        
## [112] "rbcL-2023-7-8-H5_S407_L001_R1_001.fastq"                        
## [113] "rbcL-2023-7-8-H7_S408_L001_R1_001.fastq"                        
## [114] "rbcL-2023-8-4-H2_S409_L001_R1_001.fastq"                        
## [115] "rbcL-2023-8-4-H5_S410_L001_R1_001.fastq"                        
## [116] "rbcL-2023-8-4-H6_S411_L001_R1_001.fastq"                        
## [117] "rbcL-2023-8-4-H7_S412_L001_R1_001.fastq"                        
## [118] "rbcL-2023-8-4-H8_S413_L001_R1_001.fastq"                        
## [119] "rbcL-2023-8-4-H9_S414_L001_R1_001.fastq"                        
## [120] "rbcL-Ba001_S415_L001_R1_001.fastq"                              
## [121] "rbcL-Ba002_S416_L001_R1_001.fastq"                              
## [122] "rbcL-Ba003_S417_L001_R1_001.fastq"                              
## [123] "rbcL-Bb001_S418_L001_R1_001.fastq"                              
## [124] "rbcL-Bb002_S419_L001_R1_001.fastq"                              
## [125] "rbcL-Bb003_S420_L001_R1_001.fastq"                              
## [126] "rbcL-Bb004_S421_L001_R1_001.fastq"                              
## [127] "rbcL-Bb005_S422_L001_R1_001.fastq"                              
## [128] "rbcL-Bb007_S423_L001_R1_001.fastq"                              
## [129] "rbcL-Bb008_S424_L001_R1_001.fastq"                              
## [130] "rbcL-Bb009_S425_L001_R1_001.fastq"                              
## [131] "rbcL-Bb010_S426_L001_R1_001.fastq"                              
## [132] "rbcL-Bb011_S427_L001_R1_001.fastq"                              
## [133] "rbcL-Bb012_S428_L001_R1_001.fastq"                              
## [134] "rbcL-Bb013_S429_L001_R1_001.fastq"                              
## [135] "rbcL-Bb014_S430_L001_R1_001.fastq"                              
## [136] "rbcL-Bb015_S431_L001_R1_001.fastq"                              
## [137] "rbcL-Bb016_S432_L001_R1_001.fastq"                              
## [138] "rbcL-Bb017_S433_L001_R1_001.fastq"                              
## [139] "rbcL-Bb018_S434_L001_R1_001.fastq"                              
## [140] "rbcL-Bb019_S435_L001_R1_001.fastq"                              
## [141] "rbcL-Bb020_S436_L001_R1_001.fastq"                              
## [142] "rbcL-Bb021_S437_L001_R1_001.fastq"                              
## [143] "rbcL-Bb022_S438_L001_R1_001.fastq"                              
## [144] "rbcL-Bb023_S439_L001_R1_001.fastq"                              
## [145] "rbcL-Bb024_S440_L001_R1_001.fastq"                              
## [146] "rbcL-Bb025_S441_L001_R1_001.fastq"                              
## [147] "rbcL-Bf001_S442_L001_R1_001.fastq"                              
## [148] "rbcL-Bf002_S443_L001_R1_001.fastq"                              
## [149] "rbcL-Bf003_S444_L001_R1_001.fastq"                              
## [150] "rbcL-Bf004_S445_L001_R1_001.fastq"                              
## [151] "rbcL-Bg001_S446_L001_R1_001.fastq"                              
## [152] "rbcL-Bg002_S447_L001_R1_001.fastq"                              
## [153] "rbcL-Bg003_S448_L001_R1_001.fastq"                              
## [154] "rbcL-Bg004_S449_L001_R1_001.fastq"                              
## [155] "rbcL-Bg005_S450_L001_R1_001.fastq"                              
## [156] "rbcL-Bg006_S451_L001_R1_001.fastq"                              
## [157] "rbcL-Bg007_S452_L001_R1_001.fastq"                              
## [158] "rbcL-Bg008_S453_L001_R1_001.fastq"                              
## [159] "rbcL-Bg009_S454_L001_R1_001.fastq"                              
## [160] "rbcL-Bg010_S455_L001_R1_001.fastq"                              
## [161] "rbcL-Bg011_S456_L001_R1_001.fastq"                              
## [162] "rbcL-Bg012_S457_L001_R1_001.fastq"                              
## [163] "rbcL-Bg013_S458_L001_R1_001.fastq"                              
## [164] "rbcL-Bg014_S459_L001_R1_001.fastq"                              
## [165] "rbcL-Bg015_S460_L001_R1_001.fastq"                              
## [166] "rbcL-Bg016_S461_L001_R1_001.fastq"                              
## [167] "rbcL-Bg017_S462_L001_R1_001.fastq"                              
## [168] "rbcL-Bg018_S463_L001_R1_001.fastq"                              
## [169] "rbcL-Bg019_S464_L001_R1_001.fastq"                              
## [170] "rbcL-Bi001_S465_L001_R1_001.fastq"                              
## [171] "rbcL-Bi002_S466_L001_R1_001.fastq"                              
## [172] "rbcL-Bi003_S467_L001_R1_001.fastq"                              
## [173] "rbcL-Bi004_S468_L001_R1_001.fastq"                              
## [174] "rbcL-Bi005_S469_L001_R1_001.fastq"                              
## [175] "rbcL-Bi006_S470_L001_R1_001.fastq"                              
## [176] "rbcL-Bi007_S471_L001_R1_001.fastq"                              
## [177] "rbcL-CKC0001_S472_L001_R1_001.fastq"                            
## [178] "rbcL-ESE0004_S473_L001_R1_001.fastq"                            
## [179] "rbcL-ext-neg-ctrl-20230909_S474_L001_R1_001.fastq"              
## [180] "rbcL-ext-neg-ctrl-20230923_S475_L001_R1_001.fastq"              
## [181] "rbcL-ext-neg-ctrl-20230924_S476_L001_R1_001.fastq"              
## [182] "rbcL-ext-neg-ctrl-20231007_S477_L001_R1_001.fastq"              
## [183] "rbcL-ext-neg-ctrl-20231008_S478_L001_R1_001.fastq"              
## [184] "rbcL-ext-neg-ctrl-20231009_S479_L001_R1_001.fastq"              
## [185] "rbcL-ext-neg-ctrl-2024220A_S480_L001_R1_001.fastq"              
## [186] "rbcL-ext-neg-ctrl-2024220B_S481_L001_R1_001.fastq"              
## [187] "rbcL-ext-neg-ctrl-2024221A_S482_L001_R1_001.fastq"              
## [188] "rbcL-ext-neg-ctrl-2024221B_S483_L001_R1_001.fastq"              
## [189] "rbcL-ext-neg-ctrl-2024222A_S484_L001_R1_001.fastq"              
## [190] "rbcL-ext-neg-ctrl-2024222B_S485_L001_R1_001.fastq"              
## [191] "rbcL-ext-neg-ctrl-2024312A_S486_L001_R1_001.fastq"              
## [192] "rbcL-ext-neg-ctrl-2024312B_S487_L001_R1_001.fastq"              
## [193] "rbcL-ext-neg-ctrl-2024314A_S488_L001_R1_001.fastq"              
## [194] "rbcL-ext-neg-ctrl-2024314B_S489_L001_R1_001.fastq"              
## [195] "rbcL-ext-neg-ctrl-2024319_S490_L001_R1_001.fastq"               
## [196] "rbcL-ext-neg-ctrl-2024320_S491_L001_R1_001.fastq"               
## [197] "rbcL-KLS0007_S492_L001_R1_001.fastq"                            
## [198] "rbcL-KLS0027_S494_L001_R1_001.fastq"                            
## [199] "rbcL-KLS0044_S495_L001_R1_001.fastq"                            
## [200] "rbcL-KLS0045_S496_L001_R1_001.fastq"                            
## [201] "rbcL-KLS0052_S497_L001_R1_001.fastq"                            
## [202] "rbcL-KLS0054_S498_L001_R1_001.fastq"                            
## [203] "rbcL-KLS0055_S499_L001_R1_001.fastq"                            
## [204] "rbcL-KLS0071_S500_L001_R1_001.fastq"                            
## [205] "rbcL-KLS0095_S501_L001_R1_001.fastq"                            
## [206] "rbcL-KLS0096_S502_L001_R1_001.fastq"                            
## [207] "rbcL-KLS0105_S503_L001_R1_001.fastq"                            
## [208] "rbcL-KLS0106_S504_L001_R1_001.fastq"                            
## [209] "rbcL-KLS0119_S505_L001_R1_001.fastq"                            
## [210] "rbcL-KLS0134_S506_L001_R1_001.fastq"                            
## [211] "rbcL-KLS0135_S507_L001_R1_001.fastq"                            
## [212] "rbcL-KLS0136_S508_L001_R1_001.fastq"                            
## [213] "rbcL-KLS0137_S509_L001_R1_001.fastq"                            
## [214] "rbcL-KLS0138_S510_L001_R1_001.fastq"                            
## [215] "rbcL-KLS0139_S511_L001_R1_001.fastq"                            
## [216] "rbcL-KLS0150_S512_L001_R1_001.fastq"                            
## [217] "rbcL-KLS0153_S513_L001_R1_001.fastq"                            
## [218] "rbcL-KLS0155_S514_L001_R1_001.fastq"                            
## [219] "rbcL-KLS0156_S515_L001_R1_001.fastq"                            
## [220] "rbcL-KLS0159_S516_L001_R1_001.fastq"                            
## [221] "rbcL-KLS0163_S517_L001_R1_001.fastq"                            
## [222] "rbcL-KLS0165_S518_L001_R1_001.fastq"                            
## [223] "rbcL-KLS0167_S519_L001_R1_001.fastq"                            
## [224] "rbcL-KLS0168_S520_L001_R1_001.fastq"                            
## [225] "rbcL-KLS0169_S521_L001_R1_001.fastq"                            
## [226] "rbcL-KLS0170_S522_L001_R1_001.fastq"                            
## [227] "rbcL-KLS0200_S523_L001_R1_001.fastq"                            
## [228] "rbcL-KLS0201_S524_L001_R1_001.fastq"                            
## [229] "rbcL-KLS0205_S525_L001_R1_001.fastq"                            
## [230] "rbcL-KLS0209_S526_L001_R1_001.fastq"                            
## [231] "rbcL-KLS0221_S527_L001_R1_001.fastq"                            
## [232] "rbcL-KLS0224_S528_L001_R1_001.fastq"                            
## [233] "rbcL-KLS0225_S529_L001_R1_001.fastq"                            
## [234] "rbcL-KLS0227_S530_L001_R1_001.fastq"                            
## [235] "rbcL-KLS0241_S531_L001_R1_001.fastq"                            
## [236] "rbcL-KLS0244_S532_L001_R1_001.fastq"                            
## [237] "rbcL-KLS0246_S533_L001_R1_001.fastq"                            
## [238] "rbcL-KLS0248_S534_L001_R1_001.fastq"                            
## [239] "rbcL-KLS0253_S535_L001_R1_001.fastq"                            
## [240] "rbcL-KLS0254_S536_L001_R1_001.fastq"                            
## [241] "rbcL-KLS0256_S493_L001_R1_001.fastq"                            
## [242] "rbcL-KLS0259_S537_L001_R1_001.fastq"                            
## [243] "rbcL-KLS0263_S538_L001_R1_001.fastq"                            
## [244] "rbcL-KLS0266_S539_L001_R1_001.fastq"                            
## [245] "rbcL-KLS0272_S540_L001_R1_001.fastq"                            
## [246] "rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R1_001.fastq"         
## [247] "rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R1_001.fastq"         
## [248] "rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R1_001.fastq"        
## [249] "rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq"        
## [250] "rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R1_001.fastq"         
## [251] "rbcL-pcr-rbcL-neg-ctrl-20240531_S546_L001_R1_001.fastq"         
## [252] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416_S547_L001_R1_001.fastq"
## [253] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R1_001.fastq"
## [254] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120_S549_L001_R1_001.fastq"
## [255] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121_S550_L001_R1_001.fastq"
## [256] "rbcL-SCA0009_S551_L001_R1_001.fastq"                            
## [257] "rbcL-SCA0010_S552_L001_R1_001.fastq"                            
## [258] "rbcL-SCA0013_S553_L001_R1_001.fastq"
                                  strsplit(rownames(as.data.frame(out)),  "_S")
## [[1]]
## [1] "rbcL-2020-6-16-H1"     "293_L001_R1_001.fastq"
## 
## [[2]]
## [1] "rbcL-2020-6-16-H5"     "294_L001_R1_001.fastq"
## 
## [[3]]
## [1] "rbcL-2020-6-16-H6"     "295_L001_R1_001.fastq"
## 
## [[4]]
## [1] "rbcL-2020-6-17-H2"     "296_L001_R1_001.fastq"
## 
## [[5]]
## [1] "rbcL-2020-6-17-H4"     "297_L001_R1_001.fastq"
## 
## [[6]]
## [1] "rbcL-2020-6-17-H8"     "298_L001_R1_001.fastq"
## 
## [[7]]
## [1] "rbcL-2020-6-18-H3"     "299_L001_R1_001.fastq"
## 
## [[8]]
## [1] "rbcL-2020-6-18-H7"     "300_L001_R1_001.fastq"
## 
## [[9]]
## [1] "rbcL-2020-6-18-H9"     "301_L001_R1_001.fastq"
## 
## [[10]]
## [1] "rbcL-2020-6-3-H1"      "302_L001_R1_001.fastq"
## 
## [[11]]
## [1] "rbcL-2020-6-3-H5"      "303_L001_R1_001.fastq"
## 
## [[12]]
## [1] "rbcL-2020-6-3-H6"      "304_L001_R1_001.fastq"
## 
## [[13]]
## [1] "rbcL-2020-6-30-H1"     "305_L001_R1_001.fastq"
## 
## [[14]]
## [1] "rbcL-2020-6-30-H5"     "306_L001_R1_001.fastq"
## 
## [[15]]
## [1] "rbcL-2020-6-30-H6"     "307_L001_R1_001.fastq"
## 
## [[16]]
## [1] "rbcL-2020-6-4-H2"      "308_L001_R1_001.fastq"
## 
## [[17]]
## [1] "rbcL-2020-6-4-H4"      "309_L001_R1_001.fastq"
## 
## [[18]]
## [1] "rbcL-2020-6-4-H8"      "310_L001_R1_001.fastq"
## 
## [[19]]
## [1] "rbcL-2020-6-5-H3"      "311_L001_R1_001.fastq"
## 
## [[20]]
## [1] "rbcL-2020-6-5-H7"      "312_L001_R1_001.fastq"
## 
## [[21]]
## [1] "rbcL-2020-6-5-H9"      "313_L001_R1_001.fastq"
## 
## [[22]]
## [1] "rbcL-2020-7-1-H2"      "314_L001_R1_001.fastq"
## 
## [[23]]
## [1] "rbcL-2020-7-1-H4"      "315_L001_R1_001.fastq"
## 
## [[24]]
## [1] "rbcL-2020-7-1-H8"      "316_L001_R1_001.fastq"
## 
## [[25]]
## [1] "rbcL-2020-7-14-H1"     "317_L001_R1_001.fastq"
## 
## [[26]]
## [1] "rbcL-2020-7-14-H5"     "318_L001_R1_001.fastq"
## 
## [[27]]
## [1] "rbcL-2020-7-14-H6"     "319_L001_R1_001.fastq"
## 
## [[28]]
## [1] "rbcL-2020-7-15-H4"     "321_L001_R1_001.fastq"
## 
## [[29]]
## [1] "rbcL-2020-7-15-H8"     "322_L001_R1_001.fastq"
## 
## [[30]]
## [1] "rbcL-2020-7-16-H3"     "323_L001_R1_001.fastq"
## 
## [[31]]
## [1] "rbcL-2020-7-16-H7"     "324_L001_R1_001.fastq"
## 
## [[32]]
## [1] "rbcL-2020-7-16-H9"     "325_L001_R1_001.fastq"
## 
## [[33]]
## [1] "rbcL-2020-7-2-H3"      "326_L001_R1_001.fastq"
## 
## [[34]]
## [1] "rbcL-2020-7-2-H7"      "327_L001_R1_001.fastq"
## 
## [[35]]
## [1] "rbcL-2020-7-2-H9"      "328_L001_R1_001.fastq"
## 
## [[36]]
## [1] "rbcL-2021-6-13-H1"     "329_L001_R1_001.fastq"
## 
## [[37]]
## [1] "rbcL-2021-6-13-H3"     "330_L001_R1_001.fastq"
## 
## [[38]]
## [1] "rbcL-2021-6-14-H11"    "331_L001_R1_001.fastq"
## 
## [[39]]
## [1] "rbcL-2021-6-14-H6"     "332_L001_R1_001.fastq"
## 
## [[40]]
## [1] "rbcL-2021-6-14-H7"     "333_L001_R1_001.fastq"
## 
## [[41]]
## [1] "rbcL-2021-6-15-H8"     "334_L001_R1_001.fastq"
## 
## [[42]]
## [1] "rbcL-2021-6-21-H10"    "335_L001_R1_001.fastq"
## 
## [[43]]
## [1] "rbcL-2021-6-21-H12"    "336_L001_R1_001.fastq"
## 
## [[44]]
## [1] "rbcL-2021-6-21-H9"     "337_L001_R1_001.fastq"
## 
## [[45]]
## [1] "rbcL-2021-6-27-H21"    "338_L001_R1_001.fastq"
## 
## [[46]]
## [1] "rbcL-2021-6-27-H22"    "339_L001_R1_001.fastq"
## 
## [[47]]
## [1] "rbcL-2021-6-27-H27"    "340_L001_R1_001.fastq"
## 
## [[48]]
## [1] "rbcL-2021-6-28-H25"    "341_L001_R1_001.fastq"
## 
## [[49]]
## [1] "rbcL-2021-6-28-H26"    "342_L001_R1_001.fastq"
## 
## [[50]]
## [1] "rbcL-2021-6-28-H28"    "343_L001_R1_001.fastq"
## 
## [[51]]
## [1] "rbcL-2021-6-29-H17"    "344_L001_R1_001.fastq"
## 
## [[52]]
## [1] "rbcL-2021-6-29-H23"    "345_L001_R1_001.fastq"
## 
## [[53]]
## [1] "rbcL-2021-6-29-H24"    "346_L001_R1_001.fastq"
## 
## [[54]]
## [1] "rbcL-2021-6-4-H21"     "347_L001_R1_001.fastq"
## 
## [[55]]
## [1] "rbcL-2021-6-4-H22"     "348_L001_R1_001.fastq"
## 
## [[56]]
## [1] "rbcL-2021-6-4-H27"     "349_L001_R1_001.fastq"
## 
## [[57]]
## [1] "rbcL-2021-6-5-H18"     "350_L001_R1_001.fastq"
## 
## [[58]]
## [1] "rbcL-2021-6-5-H25"     "351_L001_R1_001.fastq"
## 
## [[59]]
## [1] "rbcL-2021-6-5-H26"     "352_L001_R1_001.fastq"
## 
## [[60]]
## [1] "rbcL-2021-6-6-H17"     "353_L001_R1_001.fastq"
## 
## [[61]]
## [1] "rbcL-2021-6-6-H24"     "354_L001_R1_001.fastq"
## 
## [[62]]
## [1] "rbcL-2021-6-7-H23"     "355_L001_R1_001.fastq"
## 
## [[63]]
## [1] "rbcL-2021-7-14-H10"    "356_L001_R1_001.fastq"
## 
## [[64]]
## [1] "rbcL-2021-7-14-H12"    "357_L001_R1_001.fastq"
## 
## [[65]]
## [1] "rbcL-2021-7-20-H27"    "358_L001_R1_001.fastq"
## 
## [[66]]
## [1] "rbcL-2021-7-21-H25"    "359_L001_R1_001.fastq"
## 
## [[67]]
## [1] "rbcL-2021-7-21-H26"    "360_L001_R1_001.fastq"
## 
## [[68]]
## [1] "rbcL-2021-7-6-H11"     "362_L001_R1_001.fastq"
## 
## [[69]]
## [1] "rbcL-2021-7-6-H6"      "364_L001_R1_001.fastq"
## 
## [[70]]
## [1] "rbcL-2021-7-7-H4"      "365_L001_R1_001.fastq"
## 
## [[71]]
## [1] "rbcL-2021-7-7-H8"      "366_L001_R1_001.fastq"
## 
## [[72]]
## [1] "rbcL-2021-7-8-H3"      "367_L001_R1_001.fastq"
## 
## [[73]]
## [1] "rbcL-2023-6-12-H3"     "368_L001_R1_001.fastq"
## 
## [[74]]
## [1] "rbcL-2023-6-12-H5"     "369_L001_R1_001.fastq"
## 
## [[75]]
## [1] "rbcL-2023-6-12-H7"     "370_L001_R1_001.fastq"
## 
## [[76]]
## [1] "rbcL-2023-6-13-H6"     "371_L001_R1_001.fastq"
## 
## [[77]]
## [1] "rbcL-2023-6-13-H8"     "372_L001_R1_001.fastq"
## 
## [[78]]
## [1] "rbcL-2023-6-13-H9"     "373_L001_R1_001.fastq"
## 
## [[79]]
## [1] "rbcL-2023-6-14-H3"     "374_L001_R1_001.fastq"
## 
## [[80]]
## [1] "rbcL-2023-6-14-H7"     "375_L001_R1_001.fastq"
## 
## [[81]]
## [1] "rbcL-2023-6-14-H9"     "376_L001_R1_001.fastq"
## 
## [[82]]
## [1] "rbcL-2023-6-16-H5"     "377_L001_R1_001.fastq"
## 
## [[83]]
## [1] "rbcL-2023-6-24-H6"     "378_L001_R1_001.fastq"
## 
## [[84]]
## [1] "rbcL-2023-6-24-H8"     "379_L001_R1_001.fastq"
## 
## [[85]]
## [1] "rbcL-2023-6-25-H2"     "380_L001_R1_001.fastq"
## 
## [[86]]
## [1] "rbcL-2023-6-25-H4"     "381_L001_R1_001.fastq"
## 
## [[87]]
## [1] "rbcL-2023-6-26-H1"     "382_L001_R1_001.fastq"
## 
## [[88]]
## [1] "rbcL-2023-6-26-H7"     "383_L001_R1_001.fastq"
## 
## [[89]]
## [1] "rbcL-2023-6-27-H3"     "384_L001_R1_001.fastq"
## 
## [[90]]
## [1] "rbcL-2023-6-27-H5"     "385_L001_R1_001.fastq"
## 
## [[91]]
## [1] "rbcL-2023-6-8-H1"      "386_L001_R1_001.fastq"
## 
## [[92]]
## [1] "rbcL-2023-6-8-H2"      "387_L001_R1_001.fastq"
## 
## [[93]]
## [1] "rbcL-2023-6-8-H4"      "388_L001_R1_001.fastq"
## 
## [[94]]
## [1] "rbcL-2023-6-9-H2"      "389_L001_R1_001.fastq"
## 
## [[95]]
## [1] "rbcL-2023-6-9-H4"      "390_L001_R1_001.fastq"
## 
## [[96]]
## [1] "rbcL-2023-7-15-H6"     "391_L001_R1_001.fastq"
## 
## [[97]]
## [1] "rbcL-2023-7-16-H4"     "392_L001_R1_001.fastq"
## 
## [[98]]
## [1] "rbcL-2023-7-17-H1"     "393_L001_R1_001.fastq"
## 
## [[99]]
## [1] "rbcL-2023-7-18-H3"     "394_L001_R1_001.fastq"
## 
## [[100]]
## [1] "rbcL-2023-7-18-H7"     "395_L001_R1_001.fastq"
## 
## [[101]]
## [1] "rbcL-2023-7-29-H5"     "396_L001_R1_001.fastq"
## 
## [[102]]
## [1] "rbcL-2023-7-29-H7"     "397_L001_R1_001.fastq"
## 
## [[103]]
## [1] "rbcL-2023-7-30-H8"     "398_L001_R1_001.fastq"
## 
## [[104]]
## [1] "rbcL-2023-7-30-H9"     "399_L001_R1_001.fastq"
## 
## [[105]]
## [1] "rbcL-2023-7-5-H1"      "400_L001_R1_001.fastq"
## 
## [[106]]
## [1] "rbcL-2023-7-5-H2"      "401_L001_R1_001.fastq"
## 
## [[107]]
## [1] "rbcL-2023-7-5-H4"      "402_L001_R1_001.fastq"
## 
## [[108]]
## [1] "rbcL-2023-7-6-H6"      "403_L001_R1_001.fastq"
## 
## [[109]]
## [1] "rbcL-2023-7-6-H8"      "404_L001_R1_001.fastq"
## 
## [[110]]
## [1] "rbcL-2023-7-6-H9"      "405_L001_R1_001.fastq"
## 
## [[111]]
## [1] "rbcL-2023-7-8-H3"      "406_L001_R1_001.fastq"
## 
## [[112]]
## [1] "rbcL-2023-7-8-H5"      "407_L001_R1_001.fastq"
## 
## [[113]]
## [1] "rbcL-2023-7-8-H7"      "408_L001_R1_001.fastq"
## 
## [[114]]
## [1] "rbcL-2023-8-4-H2"      "409_L001_R1_001.fastq"
## 
## [[115]]
## [1] "rbcL-2023-8-4-H5"      "410_L001_R1_001.fastq"
## 
## [[116]]
## [1] "rbcL-2023-8-4-H6"      "411_L001_R1_001.fastq"
## 
## [[117]]
## [1] "rbcL-2023-8-4-H7"      "412_L001_R1_001.fastq"
## 
## [[118]]
## [1] "rbcL-2023-8-4-H8"      "413_L001_R1_001.fastq"
## 
## [[119]]
## [1] "rbcL-2023-8-4-H9"      "414_L001_R1_001.fastq"
## 
## [[120]]
## [1] "rbcL-Ba001"            "415_L001_R1_001.fastq"
## 
## [[121]]
## [1] "rbcL-Ba002"            "416_L001_R1_001.fastq"
## 
## [[122]]
## [1] "rbcL-Ba003"            "417_L001_R1_001.fastq"
## 
## [[123]]
## [1] "rbcL-Bb001"            "418_L001_R1_001.fastq"
## 
## [[124]]
## [1] "rbcL-Bb002"            "419_L001_R1_001.fastq"
## 
## [[125]]
## [1] "rbcL-Bb003"            "420_L001_R1_001.fastq"
## 
## [[126]]
## [1] "rbcL-Bb004"            "421_L001_R1_001.fastq"
## 
## [[127]]
## [1] "rbcL-Bb005"            "422_L001_R1_001.fastq"
## 
## [[128]]
## [1] "rbcL-Bb007"            "423_L001_R1_001.fastq"
## 
## [[129]]
## [1] "rbcL-Bb008"            "424_L001_R1_001.fastq"
## 
## [[130]]
## [1] "rbcL-Bb009"            "425_L001_R1_001.fastq"
## 
## [[131]]
## [1] "rbcL-Bb010"            "426_L001_R1_001.fastq"
## 
## [[132]]
## [1] "rbcL-Bb011"            "427_L001_R1_001.fastq"
## 
## [[133]]
## [1] "rbcL-Bb012"            "428_L001_R1_001.fastq"
## 
## [[134]]
## [1] "rbcL-Bb013"            "429_L001_R1_001.fastq"
## 
## [[135]]
## [1] "rbcL-Bb014"            "430_L001_R1_001.fastq"
## 
## [[136]]
## [1] "rbcL-Bb015"            "431_L001_R1_001.fastq"
## 
## [[137]]
## [1] "rbcL-Bb016"            "432_L001_R1_001.fastq"
## 
## [[138]]
## [1] "rbcL-Bb017"            "433_L001_R1_001.fastq"
## 
## [[139]]
## [1] "rbcL-Bb018"            "434_L001_R1_001.fastq"
## 
## [[140]]
## [1] "rbcL-Bb019"            "435_L001_R1_001.fastq"
## 
## [[141]]
## [1] "rbcL-Bb020"            "436_L001_R1_001.fastq"
## 
## [[142]]
## [1] "rbcL-Bb021"            "437_L001_R1_001.fastq"
## 
## [[143]]
## [1] "rbcL-Bb022"            "438_L001_R1_001.fastq"
## 
## [[144]]
## [1] "rbcL-Bb023"            "439_L001_R1_001.fastq"
## 
## [[145]]
## [1] "rbcL-Bb024"            "440_L001_R1_001.fastq"
## 
## [[146]]
## [1] "rbcL-Bb025"            "441_L001_R1_001.fastq"
## 
## [[147]]
## [1] "rbcL-Bf001"            "442_L001_R1_001.fastq"
## 
## [[148]]
## [1] "rbcL-Bf002"            "443_L001_R1_001.fastq"
## 
## [[149]]
## [1] "rbcL-Bf003"            "444_L001_R1_001.fastq"
## 
## [[150]]
## [1] "rbcL-Bf004"            "445_L001_R1_001.fastq"
## 
## [[151]]
## [1] "rbcL-Bg001"            "446_L001_R1_001.fastq"
## 
## [[152]]
## [1] "rbcL-Bg002"            "447_L001_R1_001.fastq"
## 
## [[153]]
## [1] "rbcL-Bg003"            "448_L001_R1_001.fastq"
## 
## [[154]]
## [1] "rbcL-Bg004"            "449_L001_R1_001.fastq"
## 
## [[155]]
## [1] "rbcL-Bg005"            "450_L001_R1_001.fastq"
## 
## [[156]]
## [1] "rbcL-Bg006"            "451_L001_R1_001.fastq"
## 
## [[157]]
## [1] "rbcL-Bg007"            "452_L001_R1_001.fastq"
## 
## [[158]]
## [1] "rbcL-Bg008"            "453_L001_R1_001.fastq"
## 
## [[159]]
## [1] "rbcL-Bg009"            "454_L001_R1_001.fastq"
## 
## [[160]]
## [1] "rbcL-Bg010"            "455_L001_R1_001.fastq"
## 
## [[161]]
## [1] "rbcL-Bg011"            "456_L001_R1_001.fastq"
## 
## [[162]]
## [1] "rbcL-Bg012"            "457_L001_R1_001.fastq"
## 
## [[163]]
## [1] "rbcL-Bg013"            "458_L001_R1_001.fastq"
## 
## [[164]]
## [1] "rbcL-Bg014"            "459_L001_R1_001.fastq"
## 
## [[165]]
## [1] "rbcL-Bg015"            "460_L001_R1_001.fastq"
## 
## [[166]]
## [1] "rbcL-Bg016"            "461_L001_R1_001.fastq"
## 
## [[167]]
## [1] "rbcL-Bg017"            "462_L001_R1_001.fastq"
## 
## [[168]]
## [1] "rbcL-Bg018"            "463_L001_R1_001.fastq"
## 
## [[169]]
## [1] "rbcL-Bg019"            "464_L001_R1_001.fastq"
## 
## [[170]]
## [1] "rbcL-Bi001"            "465_L001_R1_001.fastq"
## 
## [[171]]
## [1] "rbcL-Bi002"            "466_L001_R1_001.fastq"
## 
## [[172]]
## [1] "rbcL-Bi003"            "467_L001_R1_001.fastq"
## 
## [[173]]
## [1] "rbcL-Bi004"            "468_L001_R1_001.fastq"
## 
## [[174]]
## [1] "rbcL-Bi005"            "469_L001_R1_001.fastq"
## 
## [[175]]
## [1] "rbcL-Bi006"            "470_L001_R1_001.fastq"
## 
## [[176]]
## [1] "rbcL-Bi007"            "471_L001_R1_001.fastq"
## 
## [[177]]
## [1] "rbcL-CKC0001"          "472_L001_R1_001.fastq"
## 
## [[178]]
## [1] "rbcL-ESE0004"          "473_L001_R1_001.fastq"
## 
## [[179]]
## [1] "rbcL-ext-neg-ctrl-20230909" "474_L001_R1_001.fastq"     
## 
## [[180]]
## [1] "rbcL-ext-neg-ctrl-20230923" "475_L001_R1_001.fastq"     
## 
## [[181]]
## [1] "rbcL-ext-neg-ctrl-20230924" "476_L001_R1_001.fastq"     
## 
## [[182]]
## [1] "rbcL-ext-neg-ctrl-20231007" "477_L001_R1_001.fastq"     
## 
## [[183]]
## [1] "rbcL-ext-neg-ctrl-20231008" "478_L001_R1_001.fastq"     
## 
## [[184]]
## [1] "rbcL-ext-neg-ctrl-20231009" "479_L001_R1_001.fastq"     
## 
## [[185]]
## [1] "rbcL-ext-neg-ctrl-2024220A" "480_L001_R1_001.fastq"     
## 
## [[186]]
## [1] "rbcL-ext-neg-ctrl-2024220B" "481_L001_R1_001.fastq"     
## 
## [[187]]
## [1] "rbcL-ext-neg-ctrl-2024221A" "482_L001_R1_001.fastq"     
## 
## [[188]]
## [1] "rbcL-ext-neg-ctrl-2024221B" "483_L001_R1_001.fastq"     
## 
## [[189]]
## [1] "rbcL-ext-neg-ctrl-2024222A" "484_L001_R1_001.fastq"     
## 
## [[190]]
## [1] "rbcL-ext-neg-ctrl-2024222B" "485_L001_R1_001.fastq"     
## 
## [[191]]
## [1] "rbcL-ext-neg-ctrl-2024312A" "486_L001_R1_001.fastq"     
## 
## [[192]]
## [1] "rbcL-ext-neg-ctrl-2024312B" "487_L001_R1_001.fastq"     
## 
## [[193]]
## [1] "rbcL-ext-neg-ctrl-2024314A" "488_L001_R1_001.fastq"     
## 
## [[194]]
## [1] "rbcL-ext-neg-ctrl-2024314B" "489_L001_R1_001.fastq"     
## 
## [[195]]
## [1] "rbcL-ext-neg-ctrl-2024319" "490_L001_R1_001.fastq"    
## 
## [[196]]
## [1] "rbcL-ext-neg-ctrl-2024320" "491_L001_R1_001.fastq"    
## 
## [[197]]
## [1] "rbcL-KLS0007"          "492_L001_R1_001.fastq"
## 
## [[198]]
## [1] "rbcL-KLS0027"          "494_L001_R1_001.fastq"
## 
## [[199]]
## [1] "rbcL-KLS0044"          "495_L001_R1_001.fastq"
## 
## [[200]]
## [1] "rbcL-KLS0045"          "496_L001_R1_001.fastq"
## 
## [[201]]
## [1] "rbcL-KLS0052"          "497_L001_R1_001.fastq"
## 
## [[202]]
## [1] "rbcL-KLS0054"          "498_L001_R1_001.fastq"
## 
## [[203]]
## [1] "rbcL-KLS0055"          "499_L001_R1_001.fastq"
## 
## [[204]]
## [1] "rbcL-KLS0071"          "500_L001_R1_001.fastq"
## 
## [[205]]
## [1] "rbcL-KLS0095"          "501_L001_R1_001.fastq"
## 
## [[206]]
## [1] "rbcL-KLS0096"          "502_L001_R1_001.fastq"
## 
## [[207]]
## [1] "rbcL-KLS0105"          "503_L001_R1_001.fastq"
## 
## [[208]]
## [1] "rbcL-KLS0106"          "504_L001_R1_001.fastq"
## 
## [[209]]
## [1] "rbcL-KLS0119"          "505_L001_R1_001.fastq"
## 
## [[210]]
## [1] "rbcL-KLS0134"          "506_L001_R1_001.fastq"
## 
## [[211]]
## [1] "rbcL-KLS0135"          "507_L001_R1_001.fastq"
## 
## [[212]]
## [1] "rbcL-KLS0136"          "508_L001_R1_001.fastq"
## 
## [[213]]
## [1] "rbcL-KLS0137"          "509_L001_R1_001.fastq"
## 
## [[214]]
## [1] "rbcL-KLS0138"          "510_L001_R1_001.fastq"
## 
## [[215]]
## [1] "rbcL-KLS0139"          "511_L001_R1_001.fastq"
## 
## [[216]]
## [1] "rbcL-KLS0150"          "512_L001_R1_001.fastq"
## 
## [[217]]
## [1] "rbcL-KLS0153"          "513_L001_R1_001.fastq"
## 
## [[218]]
## [1] "rbcL-KLS0155"          "514_L001_R1_001.fastq"
## 
## [[219]]
## [1] "rbcL-KLS0156"          "515_L001_R1_001.fastq"
## 
## [[220]]
## [1] "rbcL-KLS0159"          "516_L001_R1_001.fastq"
## 
## [[221]]
## [1] "rbcL-KLS0163"          "517_L001_R1_001.fastq"
## 
## [[222]]
## [1] "rbcL-KLS0165"          "518_L001_R1_001.fastq"
## 
## [[223]]
## [1] "rbcL-KLS0167"          "519_L001_R1_001.fastq"
## 
## [[224]]
## [1] "rbcL-KLS0168"          "520_L001_R1_001.fastq"
## 
## [[225]]
## [1] "rbcL-KLS0169"          "521_L001_R1_001.fastq"
## 
## [[226]]
## [1] "rbcL-KLS0170"          "522_L001_R1_001.fastq"
## 
## [[227]]
## [1] "rbcL-KLS0200"          "523_L001_R1_001.fastq"
## 
## [[228]]
## [1] "rbcL-KLS0201"          "524_L001_R1_001.fastq"
## 
## [[229]]
## [1] "rbcL-KLS0205"          "525_L001_R1_001.fastq"
## 
## [[230]]
## [1] "rbcL-KLS0209"          "526_L001_R1_001.fastq"
## 
## [[231]]
## [1] "rbcL-KLS0221"          "527_L001_R1_001.fastq"
## 
## [[232]]
## [1] "rbcL-KLS0224"          "528_L001_R1_001.fastq"
## 
## [[233]]
## [1] "rbcL-KLS0225"          "529_L001_R1_001.fastq"
## 
## [[234]]
## [1] "rbcL-KLS0227"          "530_L001_R1_001.fastq"
## 
## [[235]]
## [1] "rbcL-KLS0241"          "531_L001_R1_001.fastq"
## 
## [[236]]
## [1] "rbcL-KLS0244"          "532_L001_R1_001.fastq"
## 
## [[237]]
## [1] "rbcL-KLS0246"          "533_L001_R1_001.fastq"
## 
## [[238]]
## [1] "rbcL-KLS0248"          "534_L001_R1_001.fastq"
## 
## [[239]]
## [1] "rbcL-KLS0253"          "535_L001_R1_001.fastq"
## 
## [[240]]
## [1] "rbcL-KLS0254"          "536_L001_R1_001.fastq"
## 
## [[241]]
## [1] "rbcL-KLS0256"          "493_L001_R1_001.fastq"
## 
## [[242]]
## [1] "rbcL-KLS0259"          "537_L001_R1_001.fastq"
## 
## [[243]]
## [1] "rbcL-KLS0263"          "538_L001_R1_001.fastq"
## 
## [[244]]
## [1] "rbcL-KLS0266"          "539_L001_R1_001.fastq"
## 
## [[245]]
## [1] "rbcL-KLS0272"          "540_L001_R1_001.fastq"
## 
## [[246]]
## [1] "rbcL-pcr-rbcL-neg-crtl-20240417" "541_L001_R1_001.fastq"          
## 
## [[247]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240409" "542_L001_R1_001.fastq"          
## 
## [[248]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418A" "543_L001_R1_001.fastq"           
## 
## [[249]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B" "544_L001_R1_001.fastq"           
## 
## [[250]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240523" "545_L001_R1_001.fastq"          
## 
## [[251]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240531" "546_L001_R1_001.fastq"          
## 
## [[252]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416"
## [2] "547_L001_R1_001.fastq"                   
## 
## [[253]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119"
## [2] "548_L001_R1_001.fastq"                   
## 
## [[254]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120"
## [2] "549_L001_R1_001.fastq"                   
## 
## [[255]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121"
## [2] "550_L001_R1_001.fastq"                   
## 
## [[256]]
## [1] "rbcL-SCA0009"          "551_L001_R1_001.fastq"
## 
## [[257]]
## [1] "rbcL-SCA0010"          "552_L001_R1_001.fastq"
## 
## [[258]]
## [1] "rbcL-SCA0013"          "553_L001_R1_001.fastq"
                           lapply(strsplit(rownames(as.data.frame(out)),  "_S"), function(l) l[[1]])
## [[1]]
## [1] "rbcL-2020-6-16-H1"
## 
## [[2]]
## [1] "rbcL-2020-6-16-H5"
## 
## [[3]]
## [1] "rbcL-2020-6-16-H6"
## 
## [[4]]
## [1] "rbcL-2020-6-17-H2"
## 
## [[5]]
## [1] "rbcL-2020-6-17-H4"
## 
## [[6]]
## [1] "rbcL-2020-6-17-H8"
## 
## [[7]]
## [1] "rbcL-2020-6-18-H3"
## 
## [[8]]
## [1] "rbcL-2020-6-18-H7"
## 
## [[9]]
## [1] "rbcL-2020-6-18-H9"
## 
## [[10]]
## [1] "rbcL-2020-6-3-H1"
## 
## [[11]]
## [1] "rbcL-2020-6-3-H5"
## 
## [[12]]
## [1] "rbcL-2020-6-3-H6"
## 
## [[13]]
## [1] "rbcL-2020-6-30-H1"
## 
## [[14]]
## [1] "rbcL-2020-6-30-H5"
## 
## [[15]]
## [1] "rbcL-2020-6-30-H6"
## 
## [[16]]
## [1] "rbcL-2020-6-4-H2"
## 
## [[17]]
## [1] "rbcL-2020-6-4-H4"
## 
## [[18]]
## [1] "rbcL-2020-6-4-H8"
## 
## [[19]]
## [1] "rbcL-2020-6-5-H3"
## 
## [[20]]
## [1] "rbcL-2020-6-5-H7"
## 
## [[21]]
## [1] "rbcL-2020-6-5-H9"
## 
## [[22]]
## [1] "rbcL-2020-7-1-H2"
## 
## [[23]]
## [1] "rbcL-2020-7-1-H4"
## 
## [[24]]
## [1] "rbcL-2020-7-1-H8"
## 
## [[25]]
## [1] "rbcL-2020-7-14-H1"
## 
## [[26]]
## [1] "rbcL-2020-7-14-H5"
## 
## [[27]]
## [1] "rbcL-2020-7-14-H6"
## 
## [[28]]
## [1] "rbcL-2020-7-15-H4"
## 
## [[29]]
## [1] "rbcL-2020-7-15-H8"
## 
## [[30]]
## [1] "rbcL-2020-7-16-H3"
## 
## [[31]]
## [1] "rbcL-2020-7-16-H7"
## 
## [[32]]
## [1] "rbcL-2020-7-16-H9"
## 
## [[33]]
## [1] "rbcL-2020-7-2-H3"
## 
## [[34]]
## [1] "rbcL-2020-7-2-H7"
## 
## [[35]]
## [1] "rbcL-2020-7-2-H9"
## 
## [[36]]
## [1] "rbcL-2021-6-13-H1"
## 
## [[37]]
## [1] "rbcL-2021-6-13-H3"
## 
## [[38]]
## [1] "rbcL-2021-6-14-H11"
## 
## [[39]]
## [1] "rbcL-2021-6-14-H6"
## 
## [[40]]
## [1] "rbcL-2021-6-14-H7"
## 
## [[41]]
## [1] "rbcL-2021-6-15-H8"
## 
## [[42]]
## [1] "rbcL-2021-6-21-H10"
## 
## [[43]]
## [1] "rbcL-2021-6-21-H12"
## 
## [[44]]
## [1] "rbcL-2021-6-21-H9"
## 
## [[45]]
## [1] "rbcL-2021-6-27-H21"
## 
## [[46]]
## [1] "rbcL-2021-6-27-H22"
## 
## [[47]]
## [1] "rbcL-2021-6-27-H27"
## 
## [[48]]
## [1] "rbcL-2021-6-28-H25"
## 
## [[49]]
## [1] "rbcL-2021-6-28-H26"
## 
## [[50]]
## [1] "rbcL-2021-6-28-H28"
## 
## [[51]]
## [1] "rbcL-2021-6-29-H17"
## 
## [[52]]
## [1] "rbcL-2021-6-29-H23"
## 
## [[53]]
## [1] "rbcL-2021-6-29-H24"
## 
## [[54]]
## [1] "rbcL-2021-6-4-H21"
## 
## [[55]]
## [1] "rbcL-2021-6-4-H22"
## 
## [[56]]
## [1] "rbcL-2021-6-4-H27"
## 
## [[57]]
## [1] "rbcL-2021-6-5-H18"
## 
## [[58]]
## [1] "rbcL-2021-6-5-H25"
## 
## [[59]]
## [1] "rbcL-2021-6-5-H26"
## 
## [[60]]
## [1] "rbcL-2021-6-6-H17"
## 
## [[61]]
## [1] "rbcL-2021-6-6-H24"
## 
## [[62]]
## [1] "rbcL-2021-6-7-H23"
## 
## [[63]]
## [1] "rbcL-2021-7-14-H10"
## 
## [[64]]
## [1] "rbcL-2021-7-14-H12"
## 
## [[65]]
## [1] "rbcL-2021-7-20-H27"
## 
## [[66]]
## [1] "rbcL-2021-7-21-H25"
## 
## [[67]]
## [1] "rbcL-2021-7-21-H26"
## 
## [[68]]
## [1] "rbcL-2021-7-6-H11"
## 
## [[69]]
## [1] "rbcL-2021-7-6-H6"
## 
## [[70]]
## [1] "rbcL-2021-7-7-H4"
## 
## [[71]]
## [1] "rbcL-2021-7-7-H8"
## 
## [[72]]
## [1] "rbcL-2021-7-8-H3"
## 
## [[73]]
## [1] "rbcL-2023-6-12-H3"
## 
## [[74]]
## [1] "rbcL-2023-6-12-H5"
## 
## [[75]]
## [1] "rbcL-2023-6-12-H7"
## 
## [[76]]
## [1] "rbcL-2023-6-13-H6"
## 
## [[77]]
## [1] "rbcL-2023-6-13-H8"
## 
## [[78]]
## [1] "rbcL-2023-6-13-H9"
## 
## [[79]]
## [1] "rbcL-2023-6-14-H3"
## 
## [[80]]
## [1] "rbcL-2023-6-14-H7"
## 
## [[81]]
## [1] "rbcL-2023-6-14-H9"
## 
## [[82]]
## [1] "rbcL-2023-6-16-H5"
## 
## [[83]]
## [1] "rbcL-2023-6-24-H6"
## 
## [[84]]
## [1] "rbcL-2023-6-24-H8"
## 
## [[85]]
## [1] "rbcL-2023-6-25-H2"
## 
## [[86]]
## [1] "rbcL-2023-6-25-H4"
## 
## [[87]]
## [1] "rbcL-2023-6-26-H1"
## 
## [[88]]
## [1] "rbcL-2023-6-26-H7"
## 
## [[89]]
## [1] "rbcL-2023-6-27-H3"
## 
## [[90]]
## [1] "rbcL-2023-6-27-H5"
## 
## [[91]]
## [1] "rbcL-2023-6-8-H1"
## 
## [[92]]
## [1] "rbcL-2023-6-8-H2"
## 
## [[93]]
## [1] "rbcL-2023-6-8-H4"
## 
## [[94]]
## [1] "rbcL-2023-6-9-H2"
## 
## [[95]]
## [1] "rbcL-2023-6-9-H4"
## 
## [[96]]
## [1] "rbcL-2023-7-15-H6"
## 
## [[97]]
## [1] "rbcL-2023-7-16-H4"
## 
## [[98]]
## [1] "rbcL-2023-7-17-H1"
## 
## [[99]]
## [1] "rbcL-2023-7-18-H3"
## 
## [[100]]
## [1] "rbcL-2023-7-18-H7"
## 
## [[101]]
## [1] "rbcL-2023-7-29-H5"
## 
## [[102]]
## [1] "rbcL-2023-7-29-H7"
## 
## [[103]]
## [1] "rbcL-2023-7-30-H8"
## 
## [[104]]
## [1] "rbcL-2023-7-30-H9"
## 
## [[105]]
## [1] "rbcL-2023-7-5-H1"
## 
## [[106]]
## [1] "rbcL-2023-7-5-H2"
## 
## [[107]]
## [1] "rbcL-2023-7-5-H4"
## 
## [[108]]
## [1] "rbcL-2023-7-6-H6"
## 
## [[109]]
## [1] "rbcL-2023-7-6-H8"
## 
## [[110]]
## [1] "rbcL-2023-7-6-H9"
## 
## [[111]]
## [1] "rbcL-2023-7-8-H3"
## 
## [[112]]
## [1] "rbcL-2023-7-8-H5"
## 
## [[113]]
## [1] "rbcL-2023-7-8-H7"
## 
## [[114]]
## [1] "rbcL-2023-8-4-H2"
## 
## [[115]]
## [1] "rbcL-2023-8-4-H5"
## 
## [[116]]
## [1] "rbcL-2023-8-4-H6"
## 
## [[117]]
## [1] "rbcL-2023-8-4-H7"
## 
## [[118]]
## [1] "rbcL-2023-8-4-H8"
## 
## [[119]]
## [1] "rbcL-2023-8-4-H9"
## 
## [[120]]
## [1] "rbcL-Ba001"
## 
## [[121]]
## [1] "rbcL-Ba002"
## 
## [[122]]
## [1] "rbcL-Ba003"
## 
## [[123]]
## [1] "rbcL-Bb001"
## 
## [[124]]
## [1] "rbcL-Bb002"
## 
## [[125]]
## [1] "rbcL-Bb003"
## 
## [[126]]
## [1] "rbcL-Bb004"
## 
## [[127]]
## [1] "rbcL-Bb005"
## 
## [[128]]
## [1] "rbcL-Bb007"
## 
## [[129]]
## [1] "rbcL-Bb008"
## 
## [[130]]
## [1] "rbcL-Bb009"
## 
## [[131]]
## [1] "rbcL-Bb010"
## 
## [[132]]
## [1] "rbcL-Bb011"
## 
## [[133]]
## [1] "rbcL-Bb012"
## 
## [[134]]
## [1] "rbcL-Bb013"
## 
## [[135]]
## [1] "rbcL-Bb014"
## 
## [[136]]
## [1] "rbcL-Bb015"
## 
## [[137]]
## [1] "rbcL-Bb016"
## 
## [[138]]
## [1] "rbcL-Bb017"
## 
## [[139]]
## [1] "rbcL-Bb018"
## 
## [[140]]
## [1] "rbcL-Bb019"
## 
## [[141]]
## [1] "rbcL-Bb020"
## 
## [[142]]
## [1] "rbcL-Bb021"
## 
## [[143]]
## [1] "rbcL-Bb022"
## 
## [[144]]
## [1] "rbcL-Bb023"
## 
## [[145]]
## [1] "rbcL-Bb024"
## 
## [[146]]
## [1] "rbcL-Bb025"
## 
## [[147]]
## [1] "rbcL-Bf001"
## 
## [[148]]
## [1] "rbcL-Bf002"
## 
## [[149]]
## [1] "rbcL-Bf003"
## 
## [[150]]
## [1] "rbcL-Bf004"
## 
## [[151]]
## [1] "rbcL-Bg001"
## 
## [[152]]
## [1] "rbcL-Bg002"
## 
## [[153]]
## [1] "rbcL-Bg003"
## 
## [[154]]
## [1] "rbcL-Bg004"
## 
## [[155]]
## [1] "rbcL-Bg005"
## 
## [[156]]
## [1] "rbcL-Bg006"
## 
## [[157]]
## [1] "rbcL-Bg007"
## 
## [[158]]
## [1] "rbcL-Bg008"
## 
## [[159]]
## [1] "rbcL-Bg009"
## 
## [[160]]
## [1] "rbcL-Bg010"
## 
## [[161]]
## [1] "rbcL-Bg011"
## 
## [[162]]
## [1] "rbcL-Bg012"
## 
## [[163]]
## [1] "rbcL-Bg013"
## 
## [[164]]
## [1] "rbcL-Bg014"
## 
## [[165]]
## [1] "rbcL-Bg015"
## 
## [[166]]
## [1] "rbcL-Bg016"
## 
## [[167]]
## [1] "rbcL-Bg017"
## 
## [[168]]
## [1] "rbcL-Bg018"
## 
## [[169]]
## [1] "rbcL-Bg019"
## 
## [[170]]
## [1] "rbcL-Bi001"
## 
## [[171]]
## [1] "rbcL-Bi002"
## 
## [[172]]
## [1] "rbcL-Bi003"
## 
## [[173]]
## [1] "rbcL-Bi004"
## 
## [[174]]
## [1] "rbcL-Bi005"
## 
## [[175]]
## [1] "rbcL-Bi006"
## 
## [[176]]
## [1] "rbcL-Bi007"
## 
## [[177]]
## [1] "rbcL-CKC0001"
## 
## [[178]]
## [1] "rbcL-ESE0004"
## 
## [[179]]
## [1] "rbcL-ext-neg-ctrl-20230909"
## 
## [[180]]
## [1] "rbcL-ext-neg-ctrl-20230923"
## 
## [[181]]
## [1] "rbcL-ext-neg-ctrl-20230924"
## 
## [[182]]
## [1] "rbcL-ext-neg-ctrl-20231007"
## 
## [[183]]
## [1] "rbcL-ext-neg-ctrl-20231008"
## 
## [[184]]
## [1] "rbcL-ext-neg-ctrl-20231009"
## 
## [[185]]
## [1] "rbcL-ext-neg-ctrl-2024220A"
## 
## [[186]]
## [1] "rbcL-ext-neg-ctrl-2024220B"
## 
## [[187]]
## [1] "rbcL-ext-neg-ctrl-2024221A"
## 
## [[188]]
## [1] "rbcL-ext-neg-ctrl-2024221B"
## 
## [[189]]
## [1] "rbcL-ext-neg-ctrl-2024222A"
## 
## [[190]]
## [1] "rbcL-ext-neg-ctrl-2024222B"
## 
## [[191]]
## [1] "rbcL-ext-neg-ctrl-2024312A"
## 
## [[192]]
## [1] "rbcL-ext-neg-ctrl-2024312B"
## 
## [[193]]
## [1] "rbcL-ext-neg-ctrl-2024314A"
## 
## [[194]]
## [1] "rbcL-ext-neg-ctrl-2024314B"
## 
## [[195]]
## [1] "rbcL-ext-neg-ctrl-2024319"
## 
## [[196]]
## [1] "rbcL-ext-neg-ctrl-2024320"
## 
## [[197]]
## [1] "rbcL-KLS0007"
## 
## [[198]]
## [1] "rbcL-KLS0027"
## 
## [[199]]
## [1] "rbcL-KLS0044"
## 
## [[200]]
## [1] "rbcL-KLS0045"
## 
## [[201]]
## [1] "rbcL-KLS0052"
## 
## [[202]]
## [1] "rbcL-KLS0054"
## 
## [[203]]
## [1] "rbcL-KLS0055"
## 
## [[204]]
## [1] "rbcL-KLS0071"
## 
## [[205]]
## [1] "rbcL-KLS0095"
## 
## [[206]]
## [1] "rbcL-KLS0096"
## 
## [[207]]
## [1] "rbcL-KLS0105"
## 
## [[208]]
## [1] "rbcL-KLS0106"
## 
## [[209]]
## [1] "rbcL-KLS0119"
## 
## [[210]]
## [1] "rbcL-KLS0134"
## 
## [[211]]
## [1] "rbcL-KLS0135"
## 
## [[212]]
## [1] "rbcL-KLS0136"
## 
## [[213]]
## [1] "rbcL-KLS0137"
## 
## [[214]]
## [1] "rbcL-KLS0138"
## 
## [[215]]
## [1] "rbcL-KLS0139"
## 
## [[216]]
## [1] "rbcL-KLS0150"
## 
## [[217]]
## [1] "rbcL-KLS0153"
## 
## [[218]]
## [1] "rbcL-KLS0155"
## 
## [[219]]
## [1] "rbcL-KLS0156"
## 
## [[220]]
## [1] "rbcL-KLS0159"
## 
## [[221]]
## [1] "rbcL-KLS0163"
## 
## [[222]]
## [1] "rbcL-KLS0165"
## 
## [[223]]
## [1] "rbcL-KLS0167"
## 
## [[224]]
## [1] "rbcL-KLS0168"
## 
## [[225]]
## [1] "rbcL-KLS0169"
## 
## [[226]]
## [1] "rbcL-KLS0170"
## 
## [[227]]
## [1] "rbcL-KLS0200"
## 
## [[228]]
## [1] "rbcL-KLS0201"
## 
## [[229]]
## [1] "rbcL-KLS0205"
## 
## [[230]]
## [1] "rbcL-KLS0209"
## 
## [[231]]
## [1] "rbcL-KLS0221"
## 
## [[232]]
## [1] "rbcL-KLS0224"
## 
## [[233]]
## [1] "rbcL-KLS0225"
## 
## [[234]]
## [1] "rbcL-KLS0227"
## 
## [[235]]
## [1] "rbcL-KLS0241"
## 
## [[236]]
## [1] "rbcL-KLS0244"
## 
## [[237]]
## [1] "rbcL-KLS0246"
## 
## [[238]]
## [1] "rbcL-KLS0248"
## 
## [[239]]
## [1] "rbcL-KLS0253"
## 
## [[240]]
## [1] "rbcL-KLS0254"
## 
## [[241]]
## [1] "rbcL-KLS0256"
## 
## [[242]]
## [1] "rbcL-KLS0259"
## 
## [[243]]
## [1] "rbcL-KLS0263"
## 
## [[244]]
## [1] "rbcL-KLS0266"
## 
## [[245]]
## [1] "rbcL-KLS0272"
## 
## [[246]]
## [1] "rbcL-pcr-rbcL-neg-crtl-20240417"
## 
## [[247]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240409"
## 
## [[248]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418A"
## 
## [[249]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B"
## 
## [[250]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240523"
## 
## [[251]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240531"
## 
## [[252]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-Greeshma-20240416"
## 
## [[253]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231021-20231119"
## 
## [[254]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231022-20231120"
## 
## [[255]]
## [1] "rbcL-rbcL-pcr-neg-ctrl-20231023-20231121"
## 
## [[256]]
## [1] "rbcL-SCA0009"
## 
## [[257]]
## [1] "rbcL-SCA0010"
## 
## [[258]]
## [1] "rbcL-SCA0013"
                  strsplit(sapply(strsplit(rownames(as.data.frame(out)),  "_S"), function(l) l[[1]]),"-")
## [[1]]
## [1] "rbcL" "2020" "6"    "16"   "H1"  
## 
## [[2]]
## [1] "rbcL" "2020" "6"    "16"   "H5"  
## 
## [[3]]
## [1] "rbcL" "2020" "6"    "16"   "H6"  
## 
## [[4]]
## [1] "rbcL" "2020" "6"    "17"   "H2"  
## 
## [[5]]
## [1] "rbcL" "2020" "6"    "17"   "H4"  
## 
## [[6]]
## [1] "rbcL" "2020" "6"    "17"   "H8"  
## 
## [[7]]
## [1] "rbcL" "2020" "6"    "18"   "H3"  
## 
## [[8]]
## [1] "rbcL" "2020" "6"    "18"   "H7"  
## 
## [[9]]
## [1] "rbcL" "2020" "6"    "18"   "H9"  
## 
## [[10]]
## [1] "rbcL" "2020" "6"    "3"    "H1"  
## 
## [[11]]
## [1] "rbcL" "2020" "6"    "3"    "H5"  
## 
## [[12]]
## [1] "rbcL" "2020" "6"    "3"    "H6"  
## 
## [[13]]
## [1] "rbcL" "2020" "6"    "30"   "H1"  
## 
## [[14]]
## [1] "rbcL" "2020" "6"    "30"   "H5"  
## 
## [[15]]
## [1] "rbcL" "2020" "6"    "30"   "H6"  
## 
## [[16]]
## [1] "rbcL" "2020" "6"    "4"    "H2"  
## 
## [[17]]
## [1] "rbcL" "2020" "6"    "4"    "H4"  
## 
## [[18]]
## [1] "rbcL" "2020" "6"    "4"    "H8"  
## 
## [[19]]
## [1] "rbcL" "2020" "6"    "5"    "H3"  
## 
## [[20]]
## [1] "rbcL" "2020" "6"    "5"    "H7"  
## 
## [[21]]
## [1] "rbcL" "2020" "6"    "5"    "H9"  
## 
## [[22]]
## [1] "rbcL" "2020" "7"    "1"    "H2"  
## 
## [[23]]
## [1] "rbcL" "2020" "7"    "1"    "H4"  
## 
## [[24]]
## [1] "rbcL" "2020" "7"    "1"    "H8"  
## 
## [[25]]
## [1] "rbcL" "2020" "7"    "14"   "H1"  
## 
## [[26]]
## [1] "rbcL" "2020" "7"    "14"   "H5"  
## 
## [[27]]
## [1] "rbcL" "2020" "7"    "14"   "H6"  
## 
## [[28]]
## [1] "rbcL" "2020" "7"    "15"   "H4"  
## 
## [[29]]
## [1] "rbcL" "2020" "7"    "15"   "H8"  
## 
## [[30]]
## [1] "rbcL" "2020" "7"    "16"   "H3"  
## 
## [[31]]
## [1] "rbcL" "2020" "7"    "16"   "H7"  
## 
## [[32]]
## [1] "rbcL" "2020" "7"    "16"   "H9"  
## 
## [[33]]
## [1] "rbcL" "2020" "7"    "2"    "H3"  
## 
## [[34]]
## [1] "rbcL" "2020" "7"    "2"    "H7"  
## 
## [[35]]
## [1] "rbcL" "2020" "7"    "2"    "H9"  
## 
## [[36]]
## [1] "rbcL" "2021" "6"    "13"   "H1"  
## 
## [[37]]
## [1] "rbcL" "2021" "6"    "13"   "H3"  
## 
## [[38]]
## [1] "rbcL" "2021" "6"    "14"   "H11" 
## 
## [[39]]
## [1] "rbcL" "2021" "6"    "14"   "H6"  
## 
## [[40]]
## [1] "rbcL" "2021" "6"    "14"   "H7"  
## 
## [[41]]
## [1] "rbcL" "2021" "6"    "15"   "H8"  
## 
## [[42]]
## [1] "rbcL" "2021" "6"    "21"   "H10" 
## 
## [[43]]
## [1] "rbcL" "2021" "6"    "21"   "H12" 
## 
## [[44]]
## [1] "rbcL" "2021" "6"    "21"   "H9"  
## 
## [[45]]
## [1] "rbcL" "2021" "6"    "27"   "H21" 
## 
## [[46]]
## [1] "rbcL" "2021" "6"    "27"   "H22" 
## 
## [[47]]
## [1] "rbcL" "2021" "6"    "27"   "H27" 
## 
## [[48]]
## [1] "rbcL" "2021" "6"    "28"   "H25" 
## 
## [[49]]
## [1] "rbcL" "2021" "6"    "28"   "H26" 
## 
## [[50]]
## [1] "rbcL" "2021" "6"    "28"   "H28" 
## 
## [[51]]
## [1] "rbcL" "2021" "6"    "29"   "H17" 
## 
## [[52]]
## [1] "rbcL" "2021" "6"    "29"   "H23" 
## 
## [[53]]
## [1] "rbcL" "2021" "6"    "29"   "H24" 
## 
## [[54]]
## [1] "rbcL" "2021" "6"    "4"    "H21" 
## 
## [[55]]
## [1] "rbcL" "2021" "6"    "4"    "H22" 
## 
## [[56]]
## [1] "rbcL" "2021" "6"    "4"    "H27" 
## 
## [[57]]
## [1] "rbcL" "2021" "6"    "5"    "H18" 
## 
## [[58]]
## [1] "rbcL" "2021" "6"    "5"    "H25" 
## 
## [[59]]
## [1] "rbcL" "2021" "6"    "5"    "H26" 
## 
## [[60]]
## [1] "rbcL" "2021" "6"    "6"    "H17" 
## 
## [[61]]
## [1] "rbcL" "2021" "6"    "6"    "H24" 
## 
## [[62]]
## [1] "rbcL" "2021" "6"    "7"    "H23" 
## 
## [[63]]
## [1] "rbcL" "2021" "7"    "14"   "H10" 
## 
## [[64]]
## [1] "rbcL" "2021" "7"    "14"   "H12" 
## 
## [[65]]
## [1] "rbcL" "2021" "7"    "20"   "H27" 
## 
## [[66]]
## [1] "rbcL" "2021" "7"    "21"   "H25" 
## 
## [[67]]
## [1] "rbcL" "2021" "7"    "21"   "H26" 
## 
## [[68]]
## [1] "rbcL" "2021" "7"    "6"    "H11" 
## 
## [[69]]
## [1] "rbcL" "2021" "7"    "6"    "H6"  
## 
## [[70]]
## [1] "rbcL" "2021" "7"    "7"    "H4"  
## 
## [[71]]
## [1] "rbcL" "2021" "7"    "7"    "H8"  
## 
## [[72]]
## [1] "rbcL" "2021" "7"    "8"    "H3"  
## 
## [[73]]
## [1] "rbcL" "2023" "6"    "12"   "H3"  
## 
## [[74]]
## [1] "rbcL" "2023" "6"    "12"   "H5"  
## 
## [[75]]
## [1] "rbcL" "2023" "6"    "12"   "H7"  
## 
## [[76]]
## [1] "rbcL" "2023" "6"    "13"   "H6"  
## 
## [[77]]
## [1] "rbcL" "2023" "6"    "13"   "H8"  
## 
## [[78]]
## [1] "rbcL" "2023" "6"    "13"   "H9"  
## 
## [[79]]
## [1] "rbcL" "2023" "6"    "14"   "H3"  
## 
## [[80]]
## [1] "rbcL" "2023" "6"    "14"   "H7"  
## 
## [[81]]
## [1] "rbcL" "2023" "6"    "14"   "H9"  
## 
## [[82]]
## [1] "rbcL" "2023" "6"    "16"   "H5"  
## 
## [[83]]
## [1] "rbcL" "2023" "6"    "24"   "H6"  
## 
## [[84]]
## [1] "rbcL" "2023" "6"    "24"   "H8"  
## 
## [[85]]
## [1] "rbcL" "2023" "6"    "25"   "H2"  
## 
## [[86]]
## [1] "rbcL" "2023" "6"    "25"   "H4"  
## 
## [[87]]
## [1] "rbcL" "2023" "6"    "26"   "H1"  
## 
## [[88]]
## [1] "rbcL" "2023" "6"    "26"   "H7"  
## 
## [[89]]
## [1] "rbcL" "2023" "6"    "27"   "H3"  
## 
## [[90]]
## [1] "rbcL" "2023" "6"    "27"   "H5"  
## 
## [[91]]
## [1] "rbcL" "2023" "6"    "8"    "H1"  
## 
## [[92]]
## [1] "rbcL" "2023" "6"    "8"    "H2"  
## 
## [[93]]
## [1] "rbcL" "2023" "6"    "8"    "H4"  
## 
## [[94]]
## [1] "rbcL" "2023" "6"    "9"    "H2"  
## 
## [[95]]
## [1] "rbcL" "2023" "6"    "9"    "H4"  
## 
## [[96]]
## [1] "rbcL" "2023" "7"    "15"   "H6"  
## 
## [[97]]
## [1] "rbcL" "2023" "7"    "16"   "H4"  
## 
## [[98]]
## [1] "rbcL" "2023" "7"    "17"   "H1"  
## 
## [[99]]
## [1] "rbcL" "2023" "7"    "18"   "H3"  
## 
## [[100]]
## [1] "rbcL" "2023" "7"    "18"   "H7"  
## 
## [[101]]
## [1] "rbcL" "2023" "7"    "29"   "H5"  
## 
## [[102]]
## [1] "rbcL" "2023" "7"    "29"   "H7"  
## 
## [[103]]
## [1] "rbcL" "2023" "7"    "30"   "H8"  
## 
## [[104]]
## [1] "rbcL" "2023" "7"    "30"   "H9"  
## 
## [[105]]
## [1] "rbcL" "2023" "7"    "5"    "H1"  
## 
## [[106]]
## [1] "rbcL" "2023" "7"    "5"    "H2"  
## 
## [[107]]
## [1] "rbcL" "2023" "7"    "5"    "H4"  
## 
## [[108]]
## [1] "rbcL" "2023" "7"    "6"    "H6"  
## 
## [[109]]
## [1] "rbcL" "2023" "7"    "6"    "H8"  
## 
## [[110]]
## [1] "rbcL" "2023" "7"    "6"    "H9"  
## 
## [[111]]
## [1] "rbcL" "2023" "7"    "8"    "H3"  
## 
## [[112]]
## [1] "rbcL" "2023" "7"    "8"    "H5"  
## 
## [[113]]
## [1] "rbcL" "2023" "7"    "8"    "H7"  
## 
## [[114]]
## [1] "rbcL" "2023" "8"    "4"    "H2"  
## 
## [[115]]
## [1] "rbcL" "2023" "8"    "4"    "H5"  
## 
## [[116]]
## [1] "rbcL" "2023" "8"    "4"    "H6"  
## 
## [[117]]
## [1] "rbcL" "2023" "8"    "4"    "H7"  
## 
## [[118]]
## [1] "rbcL" "2023" "8"    "4"    "H8"  
## 
## [[119]]
## [1] "rbcL" "2023" "8"    "4"    "H9"  
## 
## [[120]]
## [1] "rbcL"  "Ba001"
## 
## [[121]]
## [1] "rbcL"  "Ba002"
## 
## [[122]]
## [1] "rbcL"  "Ba003"
## 
## [[123]]
## [1] "rbcL"  "Bb001"
## 
## [[124]]
## [1] "rbcL"  "Bb002"
## 
## [[125]]
## [1] "rbcL"  "Bb003"
## 
## [[126]]
## [1] "rbcL"  "Bb004"
## 
## [[127]]
## [1] "rbcL"  "Bb005"
## 
## [[128]]
## [1] "rbcL"  "Bb007"
## 
## [[129]]
## [1] "rbcL"  "Bb008"
## 
## [[130]]
## [1] "rbcL"  "Bb009"
## 
## [[131]]
## [1] "rbcL"  "Bb010"
## 
## [[132]]
## [1] "rbcL"  "Bb011"
## 
## [[133]]
## [1] "rbcL"  "Bb012"
## 
## [[134]]
## [1] "rbcL"  "Bb013"
## 
## [[135]]
## [1] "rbcL"  "Bb014"
## 
## [[136]]
## [1] "rbcL"  "Bb015"
## 
## [[137]]
## [1] "rbcL"  "Bb016"
## 
## [[138]]
## [1] "rbcL"  "Bb017"
## 
## [[139]]
## [1] "rbcL"  "Bb018"
## 
## [[140]]
## [1] "rbcL"  "Bb019"
## 
## [[141]]
## [1] "rbcL"  "Bb020"
## 
## [[142]]
## [1] "rbcL"  "Bb021"
## 
## [[143]]
## [1] "rbcL"  "Bb022"
## 
## [[144]]
## [1] "rbcL"  "Bb023"
## 
## [[145]]
## [1] "rbcL"  "Bb024"
## 
## [[146]]
## [1] "rbcL"  "Bb025"
## 
## [[147]]
## [1] "rbcL"  "Bf001"
## 
## [[148]]
## [1] "rbcL"  "Bf002"
## 
## [[149]]
## [1] "rbcL"  "Bf003"
## 
## [[150]]
## [1] "rbcL"  "Bf004"
## 
## [[151]]
## [1] "rbcL"  "Bg001"
## 
## [[152]]
## [1] "rbcL"  "Bg002"
## 
## [[153]]
## [1] "rbcL"  "Bg003"
## 
## [[154]]
## [1] "rbcL"  "Bg004"
## 
## [[155]]
## [1] "rbcL"  "Bg005"
## 
## [[156]]
## [1] "rbcL"  "Bg006"
## 
## [[157]]
## [1] "rbcL"  "Bg007"
## 
## [[158]]
## [1] "rbcL"  "Bg008"
## 
## [[159]]
## [1] "rbcL"  "Bg009"
## 
## [[160]]
## [1] "rbcL"  "Bg010"
## 
## [[161]]
## [1] "rbcL"  "Bg011"
## 
## [[162]]
## [1] "rbcL"  "Bg012"
## 
## [[163]]
## [1] "rbcL"  "Bg013"
## 
## [[164]]
## [1] "rbcL"  "Bg014"
## 
## [[165]]
## [1] "rbcL"  "Bg015"
## 
## [[166]]
## [1] "rbcL"  "Bg016"
## 
## [[167]]
## [1] "rbcL"  "Bg017"
## 
## [[168]]
## [1] "rbcL"  "Bg018"
## 
## [[169]]
## [1] "rbcL"  "Bg019"
## 
## [[170]]
## [1] "rbcL"  "Bi001"
## 
## [[171]]
## [1] "rbcL"  "Bi002"
## 
## [[172]]
## [1] "rbcL"  "Bi003"
## 
## [[173]]
## [1] "rbcL"  "Bi004"
## 
## [[174]]
## [1] "rbcL"  "Bi005"
## 
## [[175]]
## [1] "rbcL"  "Bi006"
## 
## [[176]]
## [1] "rbcL"  "Bi007"
## 
## [[177]]
## [1] "rbcL"    "CKC0001"
## 
## [[178]]
## [1] "rbcL"    "ESE0004"
## 
## [[179]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20230909"
## 
## [[180]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20230923"
## 
## [[181]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20230924"
## 
## [[182]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20231007"
## 
## [[183]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20231008"
## 
## [[184]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "20231009"
## 
## [[185]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024220A"
## 
## [[186]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024220B"
## 
## [[187]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024221A"
## 
## [[188]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024221B"
## 
## [[189]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024222A"
## 
## [[190]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024222B"
## 
## [[191]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024312A"
## 
## [[192]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024312B"
## 
## [[193]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024314A"
## 
## [[194]]
## [1] "rbcL"     "ext"      "neg"      "ctrl"     "2024314B"
## 
## [[195]]
## [1] "rbcL"    "ext"     "neg"     "ctrl"    "2024319"
## 
## [[196]]
## [1] "rbcL"    "ext"     "neg"     "ctrl"    "2024320"
## 
## [[197]]
## [1] "rbcL"    "KLS0007"
## 
## [[198]]
## [1] "rbcL"    "KLS0027"
## 
## [[199]]
## [1] "rbcL"    "KLS0044"
## 
## [[200]]
## [1] "rbcL"    "KLS0045"
## 
## [[201]]
## [1] "rbcL"    "KLS0052"
## 
## [[202]]
## [1] "rbcL"    "KLS0054"
## 
## [[203]]
## [1] "rbcL"    "KLS0055"
## 
## [[204]]
## [1] "rbcL"    "KLS0071"
## 
## [[205]]
## [1] "rbcL"    "KLS0095"
## 
## [[206]]
## [1] "rbcL"    "KLS0096"
## 
## [[207]]
## [1] "rbcL"    "KLS0105"
## 
## [[208]]
## [1] "rbcL"    "KLS0106"
## 
## [[209]]
## [1] "rbcL"    "KLS0119"
## 
## [[210]]
## [1] "rbcL"    "KLS0134"
## 
## [[211]]
## [1] "rbcL"    "KLS0135"
## 
## [[212]]
## [1] "rbcL"    "KLS0136"
## 
## [[213]]
## [1] "rbcL"    "KLS0137"
## 
## [[214]]
## [1] "rbcL"    "KLS0138"
## 
## [[215]]
## [1] "rbcL"    "KLS0139"
## 
## [[216]]
## [1] "rbcL"    "KLS0150"
## 
## [[217]]
## [1] "rbcL"    "KLS0153"
## 
## [[218]]
## [1] "rbcL"    "KLS0155"
## 
## [[219]]
## [1] "rbcL"    "KLS0156"
## 
## [[220]]
## [1] "rbcL"    "KLS0159"
## 
## [[221]]
## [1] "rbcL"    "KLS0163"
## 
## [[222]]
## [1] "rbcL"    "KLS0165"
## 
## [[223]]
## [1] "rbcL"    "KLS0167"
## 
## [[224]]
## [1] "rbcL"    "KLS0168"
## 
## [[225]]
## [1] "rbcL"    "KLS0169"
## 
## [[226]]
## [1] "rbcL"    "KLS0170"
## 
## [[227]]
## [1] "rbcL"    "KLS0200"
## 
## [[228]]
## [1] "rbcL"    "KLS0201"
## 
## [[229]]
## [1] "rbcL"    "KLS0205"
## 
## [[230]]
## [1] "rbcL"    "KLS0209"
## 
## [[231]]
## [1] "rbcL"    "KLS0221"
## 
## [[232]]
## [1] "rbcL"    "KLS0224"
## 
## [[233]]
## [1] "rbcL"    "KLS0225"
## 
## [[234]]
## [1] "rbcL"    "KLS0227"
## 
## [[235]]
## [1] "rbcL"    "KLS0241"
## 
## [[236]]
## [1] "rbcL"    "KLS0244"
## 
## [[237]]
## [1] "rbcL"    "KLS0246"
## 
## [[238]]
## [1] "rbcL"    "KLS0248"
## 
## [[239]]
## [1] "rbcL"    "KLS0253"
## 
## [[240]]
## [1] "rbcL"    "KLS0254"
## 
## [[241]]
## [1] "rbcL"    "KLS0256"
## 
## [[242]]
## [1] "rbcL"    "KLS0259"
## 
## [[243]]
## [1] "rbcL"    "KLS0263"
## 
## [[244]]
## [1] "rbcL"    "KLS0266"
## 
## [[245]]
## [1] "rbcL"    "KLS0272"
## 
## [[246]]
## [1] "rbcL"     "pcr"      "rbcL"     "neg"      "crtl"     "20240417"
## 
## [[247]]
## [1] "rbcL"     "pcr"      "rbcL"     "neg"      "ctrl"     "20240409"
## 
## [[248]]
## [1] "rbcL"      "pcr"       "rbcL"      "neg"       "ctrl"      "20240418A"
## 
## [[249]]
## [1] "rbcL"      "pcr"       "rbcL"      "neg"       "ctrl"      "20240418B"
## 
## [[250]]
## [1] "rbcL"     "pcr"      "rbcL"     "neg"      "ctrl"     "20240523"
## 
## [[251]]
## [1] "rbcL"     "pcr"      "rbcL"     "neg"      "ctrl"     "20240531"
## 
## [[252]]
## [1] "rbcL"     "pcr"      "rbcL"     "neg"      "ctrl"     "Greeshma" "20240416"
## 
## [[253]]
## [1] "rbcL"     "rbcL"     "pcr"      "neg"      "ctrl"     "20231021" "20231119"
## 
## [[254]]
## [1] "rbcL"     "rbcL"     "pcr"      "neg"      "ctrl"     "20231022" "20231120"
## 
## [[255]]
## [1] "rbcL"     "rbcL"     "pcr"      "neg"      "ctrl"     "20231023" "20231121"
## 
## [[256]]
## [1] "rbcL"    "SCA0009"
## 
## [[257]]
## [1] "rbcL"    "SCA0010"
## 
## [[258]]
## [1] "rbcL"    "SCA0013"
                  strsplit(sapply(strsplit(rownames(as.data.frame(out)),  "_S"), function(l) l[[1]]),"-")[[1]][-1]
## [1] "2020" "6"    "16"   "H1"
            temp<-strsplit(sapply(strsplit(rownames(as.data.frame(out)),  "_S"), function(l) l[[1]]),"-")

sample.names<-character(length(rownames(as.data.frame(out)))) #set up container object
for(i in 1:length(rownames(as.data.frame(out)))){ #fill container with sample names
  sample.names[i]<-paste(temp[[i]][-1],collapse="_")
}
head(sample.names); tail(sample.names); length(sample.names); length(rownames(out)) #sample.names, length of sample.names, length of samples output from filterAndTrim
## [1] "2020_6_16_H1" "2020_6_16_H5" "2020_6_16_H6" "2020_6_17_H2" "2020_6_17_H4"
## [6] "2020_6_17_H8"
## [1] "rbcL_pcr_neg_ctrl_20231021_20231119" "rbcL_pcr_neg_ctrl_20231022_20231120"
## [3] "rbcL_pcr_neg_ctrl_20231023_20231121" "SCA0009"                            
## [5] "SCA0010"                             "SCA0013"
## [1] 258
## [1] 258
rownames(out)<-sample.names

Not every sample made it through the filterAndTrim step

length(file.path(path.cut, "filtered", basename(cutFs))) #length of "filtFs," created in chunk above (258)
## [1] 258
length(list.files(file.path(path.cut, "filtered"), pattern = "L001_R1_001.fastq", full.names = TRUE)) #length of files actually written to the filtFs directories (246)
## [1] 246

Updating path names (after samples drop out)

# update directory, since not all samples made it thru the filter
filtFs <- file.path(path.cut, "filtered", basename(list.files(file.path(path.cut, "filtered"), pattern = "L001_R1_001.fastq", full.names = TRUE)))
filtRs <- file.path(path.cut, "filtered", basename(list.files(file.path(path.cut, "filtered"), pattern = "L001_R2_001.fastq", full.names = TRUE)))

Learn error and inspect quality of cutadapted & filtered reads

Learn the error rates

Learns the error rates from an input list, or vector, of file names or a list of derep-class objects. Error rate estimation is performed by errorEstimationFunction. The output of this function serves as input to the dada function call as the err parameter

This uses the reads from the filter and trimmed files located in the “filtered” folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered

#You can safely ignore error messages “Not all sequences were the same length.”
errF <- learnErrors(filtFs, multithread = TRUE)
## 100662550 total bases in 366324 reads from 38 samples will be used for learning the error rates.
errR <- learnErrors(filtRs, multithread = TRUE)
## 100000500 total bases in 355673 reads from 37 samples will be used for learning the error rates.
#explanation of parameters in the learnErrors() function:
#learnErrors(
            #fls,             <-- fastq files
            #nbases = 1e+08,   <-- minimum number of total bases to learn error rate
            #nreads = NULL,   <-- deprecated, don't use
            #errorEstimationFunction = loessErrfun,
            #multithread = FALSE, <-- if enabled, sets the number of threads
            #randomize = FALSE,  <-- If FALSE, samples are read in the provided order until enough reads are obtained. If TRUE, samples are picked at random from those provided
            #MAX_CONSIST = 10,  <--The maximum number of times to step through the self-consistency loop.
            #OMEGA_C = 0,  <--The threshold at which unique sequences inferred to contain errors are corrected in the final output, and used to estimate the error rates
            #qualityType = "Auto", <--The quality encoding of the fastq file(s). "Auto" (the default) means to attempt to auto-detect the encoding.
            #verbose = FALSE)

Plot errors

We expect a roughly linear decrease in Log transformed error frequency as the consensus quality score increases from 0 to 40

plotErrors(errF, nominalQ = TRUE) #forward
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.

plotErrors(errR, nominalQ = TRUE) #reverse
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.

Inspect read quality profiles

The quality profile plot is a gray-scale heatmap of the frequency of each quality score at each base position. The median quality score at each position is shown by the green line, and the quartiles of the quality score distribution by the orange lines. The read line shows the scaled proportion of reads that extend to at least that position.

plotQualityProfile(filtFs[1]) #inspect first sample's forward reads

plotQualityProfile(filtRs[1]) #reverse always worse

Dereplication & denoising of identical sequences

Dereplication combines all identical sequencing reads into into “unique sequences” with a corresponding “abundance” (the number of reads with that same sequence). Dereplication substantially reduces computation time by eliminating redundant comparisons.

DADA2 retains a summary of the quality information associated with each unique sequence. The consensus quality profile of a unique sequence is the average of the positional qualities from the dereplicated reads. These quality profiles inform the error model of the subsequent denoising step, significantly increasing DADA2’s accuracy. But we did the learnErrors step before dereplication? dada is the denoising step and uses the error model created before

using the reads from the filter and trimmed files located in the “filtered” folder /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered

Dereplicate reads

derepFs <- derepFastq(filtFs, verbose = TRUE)
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H1_S293_L001_R1_001.fastq
## Encountered 1458 unique sequences from 9241 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H5_S294_L001_R1_001.fastq
## Encountered 1990 unique sequences from 5929 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H6_S295_L001_R1_001.fastq
## Encountered 242 unique sequences from 850 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H2_S296_L001_R1_001.fastq
## Encountered 1916 unique sequences from 4559 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H4_S297_L001_R1_001.fastq
## Encountered 580 unique sequences from 2027 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H8_S298_L001_R1_001.fastq
## Encountered 2426 unique sequences from 15437 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H3_S299_L001_R1_001.fastq
## Encountered 1391 unique sequences from 4021 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H7_S300_L001_R1_001.fastq
## Encountered 1622 unique sequences from 8636 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H9_S301_L001_R1_001.fastq
## Encountered 959 unique sequences from 4778 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H1_S302_L001_R1_001.fastq
## Encountered 3209 unique sequences from 15258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H5_S303_L001_R1_001.fastq
## Encountered 4372 unique sequences from 17366 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H6_S304_L001_R1_001.fastq
## Encountered 3275 unique sequences from 17405 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H1_S305_L001_R1_001.fastq
## Encountered 2994 unique sequences from 12116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H5_S306_L001_R1_001.fastq
## Encountered 2674 unique sequences from 12313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H6_S307_L001_R1_001.fastq
## Encountered 2500 unique sequences from 12755 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H2_S308_L001_R1_001.fastq
## Encountered 355 unique sequences from 851 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H4_S309_L001_R1_001.fastq
## Encountered 166 unique sequences from 350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H8_S310_L001_R1_001.fastq
## Encountered 2544 unique sequences from 8162 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H3_S311_L001_R1_001.fastq
## Encountered 3350 unique sequences from 16183 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H7_S312_L001_R1_001.fastq
## Encountered 2798 unique sequences from 13260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H9_S313_L001_R1_001.fastq
## Encountered 2281 unique sequences from 12153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H2_S314_L001_R1_001.fastq
## Encountered 3378 unique sequences from 13295 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H4_S315_L001_R1_001.fastq
## Encountered 2 unique sequences from 2 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H8_S316_L001_R1_001.fastq
## Encountered 2738 unique sequences from 13616 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H5_S318_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H6_S319_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-15-H8_S322_L001_R1_001.fastq
## Encountered 1485 unique sequences from 8785 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H3_S323_L001_R1_001.fastq
## Encountered 5234 unique sequences from 36464 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H7_S324_L001_R1_001.fastq
## Encountered 1441 unique sequences from 8836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H9_S325_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H3_S326_L001_R1_001.fastq
## Encountered 1382 unique sequences from 7246 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H7_S327_L001_R1_001.fastq
## Encountered 2149 unique sequences from 9836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H9_S328_L001_R1_001.fastq
## Encountered 2901 unique sequences from 14023 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H1_S329_L001_R1_001.fastq
## Encountered 2254 unique sequences from 14260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H3_S330_L001_R1_001.fastq
## Encountered 2637 unique sequences from 11753 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H11_S331_L001_R1_001.fastq
## Encountered 1857 unique sequences from 12176 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H6_S332_L001_R1_001.fastq
## Encountered 2604 unique sequences from 11728 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H7_S333_L001_R1_001.fastq
## Encountered 1602 unique sequences from 10651 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-15-H8_S334_L001_R1_001.fastq
## Encountered 2309 unique sequences from 12915 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H10_S335_L001_R1_001.fastq
## Encountered 2961 unique sequences from 16187 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H12_S336_L001_R1_001.fastq
## Encountered 2412 unique sequences from 17032 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H9_S337_L001_R1_001.fastq
## Encountered 2377 unique sequences from 13821 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H21_S338_L001_R1_001.fastq
## Encountered 2963 unique sequences from 15729 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H22_S339_L001_R1_001.fastq
## Encountered 1811 unique sequences from 19232 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H27_S340_L001_R1_001.fastq
## Encountered 3128 unique sequences from 12550 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H25_S341_L001_R1_001.fastq
## Encountered 1877 unique sequences from 10169 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H26_S342_L001_R1_001.fastq
## Encountered 1527 unique sequences from 6196 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H28_S343_L001_R1_001.fastq
## Encountered 2508 unique sequences from 10332 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H17_S344_L001_R1_001.fastq
## Encountered 1528 unique sequences from 9104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H23_S345_L001_R1_001.fastq
## Encountered 2284 unique sequences from 11907 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H24_S346_L001_R1_001.fastq
## Encountered 191 unique sequences from 715 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H21_S347_L001_R1_001.fastq
## Encountered 2264 unique sequences from 11218 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H22_S348_L001_R1_001.fastq
## Encountered 1183 unique sequences from 4717 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H27_S349_L001_R1_001.fastq
## Encountered 1069 unique sequences from 4547 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H18_S350_L001_R1_001.fastq
## Encountered 440 unique sequences from 1152 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H25_S351_L001_R1_001.fastq
## Encountered 1090 unique sequences from 2834 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H26_S352_L001_R1_001.fastq
## Encountered 1094 unique sequences from 3400 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H17_S353_L001_R1_001.fastq
## Encountered 1689 unique sequences from 7362 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H24_S354_L001_R1_001.fastq
## Encountered 2915 unique sequences from 11939 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-7-H23_S355_L001_R1_001.fastq
## Encountered 2123 unique sequences from 10457 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H10_S356_L001_R1_001.fastq
## Encountered 2658 unique sequences from 22653 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H12_S357_L001_R1_001.fastq
## Encountered 4 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-20-H27_S358_L001_R1_001.fastq
## Encountered 1642 unique sequences from 9282 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H25_S359_L001_R1_001.fastq
## Encountered 1689 unique sequences from 12630 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H26_S360_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H11_S362_L001_R1_001.fastq
## Encountered 2779 unique sequences from 13905 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H6_S364_L001_R1_001.fastq
## Encountered 3232 unique sequences from 13693 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-7-H8_S366_L001_R1_001.fastq
## Encountered 1829 unique sequences from 15342 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-8-H3_S367_L001_R1_001.fastq
## Encountered 1860 unique sequences from 11288 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H3_S368_L001_R1_001.fastq
## Encountered 1177 unique sequences from 6412 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H5_S369_L001_R1_001.fastq
## Encountered 2436 unique sequences from 14325 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H7_S370_L001_R1_001.fastq
## Encountered 1678 unique sequences from 8964 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H6_S371_L001_R1_001.fastq
## Encountered 2391 unique sequences from 12999 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H8_S372_L001_R1_001.fastq
## Encountered 1918 unique sequences from 8254 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H9_S373_L001_R1_001.fastq
## Encountered 921 unique sequences from 3377 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H3_S374_L001_R1_001.fastq
## Encountered 2373 unique sequences from 15274 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H7_S375_L001_R1_001.fastq
## Encountered 2981 unique sequences from 14310 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H9_S376_L001_R1_001.fastq
## Encountered 2371 unique sequences from 12646 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-16-H5_S377_L001_R1_001.fastq
## Encountered 2142 unique sequences from 11967 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H6_S378_L001_R1_001.fastq
## Encountered 2587 unique sequences from 8763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H8_S379_L001_R1_001.fastq
## Encountered 2773 unique sequences from 9782 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H2_S380_L001_R1_001.fastq
## Encountered 3801 unique sequences from 13012 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H4_S381_L001_R1_001.fastq
## Encountered 2176 unique sequences from 5350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H1_S382_L001_R1_001.fastq
## Encountered 2165 unique sequences from 5962 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H7_S383_L001_R1_001.fastq
## Encountered 3472 unique sequences from 10330 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H3_S384_L001_R1_001.fastq
## Encountered 1942 unique sequences from 6278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H5_S385_L001_R1_001.fastq
## Encountered 3675 unique sequences from 11398 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H1_S386_L001_R1_001.fastq
## Encountered 1906 unique sequences from 9094 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H2_S387_L001_R1_001.fastq
## Encountered 1706 unique sequences from 10848 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H4_S388_L001_R1_001.fastq
## Encountered 2437 unique sequences from 11980 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H2_S389_L001_R1_001.fastq
## Encountered 5129 unique sequences from 23277 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H4_S390_L001_R1_001.fastq
## Encountered 2379 unique sequences from 7104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-15-H6_S391_L001_R1_001.fastq
## Encountered 3223 unique sequences from 18724 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-16-H4_S392_L001_R1_001.fastq
## Encountered 2230 unique sequences from 13047 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-17-H1_S393_L001_R1_001.fastq
## Encountered 4109 unique sequences from 21557 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H3_S394_L001_R1_001.fastq
## Encountered 2315 unique sequences from 7624 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H7_S395_L001_R1_001.fastq
## Encountered 1583 unique sequences from 8641 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H5_S396_L001_R1_001.fastq
## Encountered 1829 unique sequences from 8843 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H7_S397_L001_R1_001.fastq
## Encountered 3136 unique sequences from 19278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H8_S398_L001_R1_001.fastq
## Encountered 1142 unique sequences from 8704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H9_S399_L001_R1_001.fastq
## Encountered 1078 unique sequences from 4403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H1_S400_L001_R1_001.fastq
## Encountered 1269 unique sequences from 6719 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H2_S401_L001_R1_001.fastq
## Encountered 1042 unique sequences from 4171 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H6_S403_L001_R1_001.fastq
## Encountered 2242 unique sequences from 11259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H8_S404_L001_R1_001.fastq
## Encountered 3890 unique sequences from 19324 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H9_S405_L001_R1_001.fastq
## Encountered 4267 unique sequences from 22375 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H3_S406_L001_R1_001.fastq
## Encountered 1435 unique sequences from 7103 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H5_S407_L001_R1_001.fastq
## Encountered 1913 unique sequences from 12450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H7_S408_L001_R1_001.fastq
## Encountered 3427 unique sequences from 22286 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H2_S409_L001_R1_001.fastq
## Encountered 2576 unique sequences from 15351 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H5_S410_L001_R1_001.fastq
## Encountered 3942 unique sequences from 13489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H6_S411_L001_R1_001.fastq
## Encountered 2186 unique sequences from 11447 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H7_S412_L001_R1_001.fastq
## Encountered 5257 unique sequences from 21306 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H8_S413_L001_R1_001.fastq
## Encountered 2240 unique sequences from 11628 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H9_S414_L001_R1_001.fastq
## Encountered 232 unique sequences from 344 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba001_S415_L001_R1_001.fastq
## Encountered 1936 unique sequences from 11589 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba002_S416_L001_R1_001.fastq
## Encountered 2555 unique sequences from 8068 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba003_S417_L001_R1_001.fastq
## Encountered 1150 unique sequences from 7634 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb001_S418_L001_R1_001.fastq
## Encountered 2073 unique sequences from 13734 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb002_S419_L001_R1_001.fastq
## Encountered 2733 unique sequences from 19049 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb003_S420_L001_R1_001.fastq
## Encountered 4817 unique sequences from 25904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb004_S421_L001_R1_001.fastq
## Encountered 4831 unique sequences from 23070 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb005_S422_L001_R1_001.fastq
## Encountered 2293 unique sequences from 11519 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb007_S423_L001_R1_001.fastq
## Encountered 2107 unique sequences from 12125 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb008_S424_L001_R1_001.fastq
## Encountered 1172 unique sequences from 6567 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb009_S425_L001_R1_001.fastq
## Encountered 1672 unique sequences from 8599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb010_S426_L001_R1_001.fastq
## Encountered 1976 unique sequences from 10777 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb011_S427_L001_R1_001.fastq
## Encountered 2442 unique sequences from 8051 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb012_S428_L001_R1_001.fastq
## Encountered 4932 unique sequences from 27231 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb013_S429_L001_R1_001.fastq
## Encountered 1573 unique sequences from 6172 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb014_S430_L001_R1_001.fastq
## Encountered 2922 unique sequences from 15434 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb015_S431_L001_R1_001.fastq
## Encountered 1641 unique sequences from 7061 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb016_S432_L001_R1_001.fastq
## Encountered 1258 unique sequences from 6078 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb017_S433_L001_R1_001.fastq
## Encountered 1724 unique sequences from 7797 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb018_S434_L001_R1_001.fastq
## Encountered 3752 unique sequences from 16225 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb019_S435_L001_R1_001.fastq
## Encountered 2852 unique sequences from 19040 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb020_S436_L001_R1_001.fastq
## Encountered 2300 unique sequences from 20153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb021_S437_L001_R1_001.fastq
## Encountered 1942 unique sequences from 6625 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb022_S438_L001_R1_001.fastq
## Encountered 2128 unique sequences from 15270 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb023_S439_L001_R1_001.fastq
## Encountered 1651 unique sequences from 8291 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb024_S440_L001_R1_001.fastq
## Encountered 2620 unique sequences from 15599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb025_S441_L001_R1_001.fastq
## Encountered 1501 unique sequences from 9402 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf001_S442_L001_R1_001.fastq
## Encountered 3433 unique sequences from 18214 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf002_S443_L001_R1_001.fastq
## Encountered 3 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf003_S444_L001_R1_001.fastq
## Encountered 1932 unique sequences from 10825 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf004_S445_L001_R1_001.fastq
## Encountered 1389 unique sequences from 5494 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg001_S446_L001_R1_001.fastq
## Encountered 1199 unique sequences from 6774 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg002_S447_L001_R1_001.fastq
## Encountered 2431 unique sequences from 9593 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg003_S448_L001_R1_001.fastq
## Encountered 2390 unique sequences from 9878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg004_S449_L001_R1_001.fastq
## Encountered 1476 unique sequences from 8208 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg005_S450_L001_R1_001.fastq
## Encountered 3025 unique sequences from 11754 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg006_S451_L001_R1_001.fastq
## Encountered 2148 unique sequences from 14258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg007_S452_L001_R1_001.fastq
## Encountered 1728 unique sequences from 13163 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg008_S453_L001_R1_001.fastq
## Encountered 2815 unique sequences from 12164 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg009_S454_L001_R1_001.fastq
## Encountered 2005 unique sequences from 5238 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg010_S455_L001_R1_001.fastq
## Encountered 2092 unique sequences from 6285 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg011_S456_L001_R1_001.fastq
## Encountered 2397 unique sequences from 10184 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg012_S457_L001_R1_001.fastq
## Encountered 1236 unique sequences from 7157 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg013_S458_L001_R1_001.fastq
## Encountered 1739 unique sequences from 12665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg014_S459_L001_R1_001.fastq
## Encountered 1893 unique sequences from 12895 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg015_S460_L001_R1_001.fastq
## Encountered 1793 unique sequences from 6987 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg016_S461_L001_R1_001.fastq
## Encountered 1522 unique sequences from 5489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg017_S462_L001_R1_001.fastq
## Encountered 1124 unique sequences from 5605 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg018_S463_L001_R1_001.fastq
## Encountered 1468 unique sequences from 6740 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg019_S464_L001_R1_001.fastq
## Encountered 1534 unique sequences from 8707 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi001_S465_L001_R1_001.fastq
## Encountered 1691 unique sequences from 10410 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi002_S466_L001_R1_001.fastq
## Encountered 2813 unique sequences from 13359 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi003_S467_L001_R1_001.fastq
## Encountered 4425 unique sequences from 20553 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi004_S468_L001_R1_001.fastq
## Encountered 3192 unique sequences from 18623 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi005_S469_L001_R1_001.fastq
## Encountered 36 unique sequences from 149 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi006_S470_L001_R1_001.fastq
## Encountered 1268 unique sequences from 4554 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi007_S471_L001_R1_001.fastq
## Encountered 2803 unique sequences from 9976 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-CKC0001_S472_L001_R1_001.fastq
## Encountered 1046 unique sequences from 7833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ESE0004_S473_L001_R1_001.fastq
## Encountered 1588 unique sequences from 9116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20230909_S474_L001_R1_001.fastq
## Encountered 2 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231007_S477_L001_R1_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231008_S478_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220A_S480_L001_R1_001.fastq
## Encountered 26 unique sequences from 37 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220B_S481_L001_R1_001.fastq
## Encountered 518 unique sequences from 2165 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221A_S482_L001_R1_001.fastq
## Encountered 686 unique sequences from 2878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221B_S483_L001_R1_001.fastq
## Encountered 151 unique sequences from 622 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222A_S484_L001_R1_001.fastq
## Encountered 198 unique sequences from 823 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222B_S485_L001_R1_001.fastq
## Encountered 1232 unique sequences from 6313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312A_S486_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312B_S487_L001_R1_001.fastq
## Encountered 6 unique sequences from 6 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314A_S488_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314B_S489_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024319_S490_L001_R1_001.fastq
## Encountered 43 unique sequences from 180 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0007_S492_L001_R1_001.fastq
## Encountered 2322 unique sequences from 8407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0027_S494_L001_R1_001.fastq
## Encountered 1596 unique sequences from 7425 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0044_S495_L001_R1_001.fastq
## Encountered 286 unique sequences from 941 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0045_S496_L001_R1_001.fastq
## Encountered 2717 unique sequences from 16853 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0052_S497_L001_R1_001.fastq
## Encountered 254 unique sequences from 1057 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0054_S498_L001_R1_001.fastq
## Encountered 179 unique sequences from 991 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0055_S499_L001_R1_001.fastq
## Encountered 233 unique sequences from 1345 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0071_S500_L001_R1_001.fastq
## Encountered 1254 unique sequences from 10060 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0095_S501_L001_R1_001.fastq
## Encountered 2376 unique sequences from 18665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0096_S502_L001_R1_001.fastq
## Encountered 1976 unique sequences from 11780 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0105_S503_L001_R1_001.fastq
## Encountered 2328 unique sequences from 20059 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0106_S504_L001_R1_001.fastq
## Encountered 2956 unique sequences from 13590 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0119_S505_L001_R1_001.fastq
## Encountered 1553 unique sequences from 10904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0134_S506_L001_R1_001.fastq
## Encountered 1712 unique sequences from 10331 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0135_S507_L001_R1_001.fastq
## Encountered 452 unique sequences from 1762 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0136_S508_L001_R1_001.fastq
## Encountered 2012 unique sequences from 10072 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0137_S509_L001_R1_001.fastq
## Encountered 4607 unique sequences from 21450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0138_S510_L001_R1_001.fastq
## Encountered 1797 unique sequences from 10763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0139_S511_L001_R1_001.fastq
## Encountered 2508 unique sequences from 10490 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0150_S512_L001_R1_001.fastq
## Encountered 2725 unique sequences from 13943 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0153_S513_L001_R1_001.fastq
## Encountered 1076 unique sequences from 3156 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0155_S514_L001_R1_001.fastq
## Encountered 2738 unique sequences from 15382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0156_S515_L001_R1_001.fastq
## Encountered 4653 unique sequences from 27268 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0159_S516_L001_R1_001.fastq
## Encountered 3951 unique sequences from 19126 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0163_S517_L001_R1_001.fastq
## Encountered 2418 unique sequences from 8259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0165_S518_L001_R1_001.fastq
## Encountered 2021 unique sequences from 7382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0167_S519_L001_R1_001.fastq
## Encountered 2582 unique sequences from 12356 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0168_S520_L001_R1_001.fastq
## Encountered 1500 unique sequences from 9986 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0169_S521_L001_R1_001.fastq
## Encountered 4242 unique sequences from 18064 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0170_S522_L001_R1_001.fastq
## Encountered 2102 unique sequences from 9654 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0200_S523_L001_R1_001.fastq
## Encountered 1968 unique sequences from 14566 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0201_S524_L001_R1_001.fastq
## Encountered 4120 unique sequences from 23716 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0205_S525_L001_R1_001.fastq
## Encountered 3134 unique sequences from 12367 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0209_S526_L001_R1_001.fastq
## Encountered 685 unique sequences from 2911 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0221_S527_L001_R1_001.fastq
## Encountered 2247 unique sequences from 8200 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0224_S528_L001_R1_001.fastq
## Encountered 1547 unique sequences from 11029 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0225_S529_L001_R1_001.fastq
## Encountered 3266 unique sequences from 15454 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0227_S530_L001_R1_001.fastq
## Encountered 1554 unique sequences from 9704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0241_S531_L001_R1_001.fastq
## Encountered 2935 unique sequences from 13922 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0244_S532_L001_R1_001.fastq
## Encountered 3025 unique sequences from 12161 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0246_S533_L001_R1_001.fastq
## Encountered 1672 unique sequences from 5407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0248_S534_L001_R1_001.fastq
## Encountered 4225 unique sequences from 23791 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0253_S535_L001_R1_001.fastq
## Encountered 2633 unique sequences from 14833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0254_S536_L001_R1_001.fastq
## Encountered 2234 unique sequences from 9069 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0256_S493_L001_R1_001.fastq
## Encountered 5437 unique sequences from 26403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0259_S537_L001_R1_001.fastq
## Encountered 2285 unique sequences from 14649 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0263_S538_L001_R1_001.fastq
## Encountered 413 unique sequences from 1481 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0266_S539_L001_R1_001.fastq
## Encountered 294 unique sequences from 896 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0272_S540_L001_R1_001.fastq
## Encountered 506 unique sequences from 977 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R1_001.fastq
## Encountered 599 unique sequences from 2444 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R1_001.fastq
## Encountered 111 unique sequences from 462 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R1_001.fastq
## Encountered 2234 unique sequences from 8900 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq
## Encountered 21 unique sequences from 43 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R1_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R1_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0009_S551_L001_R1_001.fastq
## Encountered 3186 unique sequences from 18374 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0010_S552_L001_R1_001.fastq
## Encountered 2303 unique sequences from 13370 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0013_S553_L001_R1_001.fastq
## Encountered 3542 unique sequences from 18429 total sequences read.
derepRs <- derepFastq(filtRs, verbose = TRUE)
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H1_S293_L001_R2_001.fastq
## Encountered 3205 unique sequences from 9241 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H5_S294_L001_R2_001.fastq
## Encountered 3022 unique sequences from 5929 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-16-H6_S295_L001_R2_001.fastq
## Encountered 397 unique sequences from 850 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H2_S296_L001_R2_001.fastq
## Encountered 2719 unique sequences from 4559 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H4_S297_L001_R2_001.fastq
## Encountered 930 unique sequences from 2027 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-17-H8_S298_L001_R2_001.fastq
## Encountered 4895 unique sequences from 15437 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H3_S299_L001_R2_001.fastq
## Encountered 2167 unique sequences from 4021 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H7_S300_L001_R2_001.fastq
## Encountered 3009 unique sequences from 8636 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-18-H9_S301_L001_R2_001.fastq
## Encountered 1876 unique sequences from 4778 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H1_S302_L001_R2_001.fastq
## Encountered 5820 unique sequences from 15258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H5_S303_L001_R2_001.fastq
## Encountered 7596 unique sequences from 17366 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-3-H6_S304_L001_R2_001.fastq
## Encountered 6427 unique sequences from 17405 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H1_S305_L001_R2_001.fastq
## Encountered 4939 unique sequences from 12116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H5_S306_L001_R2_001.fastq
## Encountered 4950 unique sequences from 12313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-30-H6_S307_L001_R2_001.fastq
## Encountered 4654 unique sequences from 12755 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H2_S308_L001_R2_001.fastq
## Encountered 517 unique sequences from 851 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H4_S309_L001_R2_001.fastq
## Encountered 234 unique sequences from 350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-4-H8_S310_L001_R2_001.fastq
## Encountered 4123 unique sequences from 8162 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H3_S311_L001_R2_001.fastq
## Encountered 5848 unique sequences from 16183 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H7_S312_L001_R2_001.fastq
## Encountered 5067 unique sequences from 13260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-6-5-H9_S313_L001_R2_001.fastq
## Encountered 4305 unique sequences from 12153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H2_S314_L001_R2_001.fastq
## Encountered 5534 unique sequences from 13295 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H4_S315_L001_R2_001.fastq
## Encountered 2 unique sequences from 2 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-1-H8_S316_L001_R2_001.fastq
## Encountered 4682 unique sequences from 13616 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H5_S318_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-14-H6_S319_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-15-H8_S322_L001_R2_001.fastq
## Encountered 3038 unique sequences from 8785 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H3_S323_L001_R2_001.fastq
## Encountered 11402 unique sequences from 36464 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H7_S324_L001_R2_001.fastq
## Encountered 2804 unique sequences from 8836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-16-H9_S325_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H3_S326_L001_R2_001.fastq
## Encountered 2495 unique sequences from 7246 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H7_S327_L001_R2_001.fastq
## Encountered 4020 unique sequences from 9836 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2020-7-2-H9_S328_L001_R2_001.fastq
## Encountered 5076 unique sequences from 14023 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H1_S329_L001_R2_001.fastq
## Encountered 4494 unique sequences from 14260 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-13-H3_S330_L001_R2_001.fastq
## Encountered 4657 unique sequences from 11753 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H11_S331_L001_R2_001.fastq
## Encountered 3597 unique sequences from 12176 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H6_S332_L001_R2_001.fastq
## Encountered 4682 unique sequences from 11728 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-14-H7_S333_L001_R2_001.fastq
## Encountered 3150 unique sequences from 10651 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-15-H8_S334_L001_R2_001.fastq
## Encountered 4269 unique sequences from 12915 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H10_S335_L001_R2_001.fastq
## Encountered 5422 unique sequences from 16187 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H12_S336_L001_R2_001.fastq
## Encountered 5073 unique sequences from 17032 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-21-H9_S337_L001_R2_001.fastq
## Encountered 4537 unique sequences from 13821 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H21_S338_L001_R2_001.fastq
## Encountered 5712 unique sequences from 15729 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H22_S339_L001_R2_001.fastq
## Encountered 4554 unique sequences from 19232 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-27-H27_S340_L001_R2_001.fastq
## Encountered 5336 unique sequences from 12550 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H25_S341_L001_R2_001.fastq
## Encountered 3726 unique sequences from 10169 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H26_S342_L001_R2_001.fastq
## Encountered 2640 unique sequences from 6196 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-28-H28_S343_L001_R2_001.fastq
## Encountered 4159 unique sequences from 10332 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H17_S344_L001_R2_001.fastq
## Encountered 3139 unique sequences from 9104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H23_S345_L001_R2_001.fastq
## Encountered 4338 unique sequences from 11907 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-29-H24_S346_L001_R2_001.fastq
## Encountered 329 unique sequences from 715 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H21_S347_L001_R2_001.fastq
## Encountered 4133 unique sequences from 11218 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H22_S348_L001_R2_001.fastq
## Encountered 2153 unique sequences from 4717 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-4-H27_S349_L001_R2_001.fastq
## Encountered 1819 unique sequences from 4547 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H18_S350_L001_R2_001.fastq
## Encountered 674 unique sequences from 1152 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H25_S351_L001_R2_001.fastq
## Encountered 1686 unique sequences from 2834 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-5-H26_S352_L001_R2_001.fastq
## Encountered 1664 unique sequences from 3400 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H17_S353_L001_R2_001.fastq
## Encountered 3037 unique sequences from 7362 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-6-H24_S354_L001_R2_001.fastq
## Encountered 4853 unique sequences from 11939 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-6-7-H23_S355_L001_R2_001.fastq
## Encountered 3897 unique sequences from 10457 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H10_S356_L001_R2_001.fastq
## Encountered 5895 unique sequences from 22653 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-14-H12_S357_L001_R2_001.fastq
## Encountered 4 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-20-H27_S358_L001_R2_001.fastq
## Encountered 3372 unique sequences from 9282 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H25_S359_L001_R2_001.fastq
## Encountered 3592 unique sequences from 12630 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-21-H26_S360_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H11_S362_L001_R2_001.fastq
## Encountered 4992 unique sequences from 13905 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-6-H6_S364_L001_R2_001.fastq
## Encountered 5478 unique sequences from 13693 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-7-H8_S366_L001_R2_001.fastq
## Encountered 4123 unique sequences from 15342 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2021-7-8-H3_S367_L001_R2_001.fastq
## Encountered 3581 unique sequences from 11288 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H3_S368_L001_R2_001.fastq
## Encountered 2311 unique sequences from 6412 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H5_S369_L001_R2_001.fastq
## Encountered 4893 unique sequences from 14325 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-12-H7_S370_L001_R2_001.fastq
## Encountered 3148 unique sequences from 8964 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H6_S371_L001_R2_001.fastq
## Encountered 4394 unique sequences from 12999 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H8_S372_L001_R2_001.fastq
## Encountered 3207 unique sequences from 8254 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-13-H9_S373_L001_R2_001.fastq
## Encountered 1387 unique sequences from 3377 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H3_S374_L001_R2_001.fastq
## Encountered 4315 unique sequences from 15274 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H7_S375_L001_R2_001.fastq
## Encountered 5203 unique sequences from 14310 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-14-H9_S376_L001_R2_001.fastq
## Encountered 4089 unique sequences from 12646 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-16-H5_S377_L001_R2_001.fastq
## Encountered 3959 unique sequences from 11967 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H6_S378_L001_R2_001.fastq
## Encountered 3968 unique sequences from 8763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-24-H8_S379_L001_R2_001.fastq
## Encountered 4212 unique sequences from 9782 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H2_S380_L001_R2_001.fastq
## Encountered 5800 unique sequences from 13012 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-25-H4_S381_L001_R2_001.fastq
## Encountered 2997 unique sequences from 5350 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H1_S382_L001_R2_001.fastq
## Encountered 3006 unique sequences from 5962 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-26-H7_S383_L001_R2_001.fastq
## Encountered 4938 unique sequences from 10330 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H3_S384_L001_R2_001.fastq
## Encountered 2935 unique sequences from 6278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-27-H5_S385_L001_R2_001.fastq
## Encountered 5351 unique sequences from 11398 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H1_S386_L001_R2_001.fastq
## Encountered 3532 unique sequences from 9094 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H2_S387_L001_R2_001.fastq
## Encountered 3028 unique sequences from 10848 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-8-H4_S388_L001_R2_001.fastq
## Encountered 4611 unique sequences from 11980 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H2_S389_L001_R2_001.fastq
## Encountered 8967 unique sequences from 23277 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-6-9-H4_S390_L001_R2_001.fastq
## Encountered 3559 unique sequences from 7104 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-15-H6_S391_L001_R2_001.fastq
## Encountered 6250 unique sequences from 18724 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-16-H4_S392_L001_R2_001.fastq
## Encountered 4292 unique sequences from 13047 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-17-H1_S393_L001_R2_001.fastq
## Encountered 7708 unique sequences from 21557 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H3_S394_L001_R2_001.fastq
## Encountered 3855 unique sequences from 7624 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-18-H7_S395_L001_R2_001.fastq
## Encountered 3019 unique sequences from 8641 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H5_S396_L001_R2_001.fastq
## Encountered 3345 unique sequences from 8843 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-29-H7_S397_L001_R2_001.fastq
## Encountered 6103 unique sequences from 19278 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H8_S398_L001_R2_001.fastq
## Encountered 2558 unique sequences from 8704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-30-H9_S399_L001_R2_001.fastq
## Encountered 1984 unique sequences from 4403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H1_S400_L001_R2_001.fastq
## Encountered 2405 unique sequences from 6719 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-5-H2_S401_L001_R2_001.fastq
## Encountered 1796 unique sequences from 4171 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H6_S403_L001_R2_001.fastq
## Encountered 4222 unique sequences from 11259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H8_S404_L001_R2_001.fastq
## Encountered 7038 unique sequences from 19324 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-6-H9_S405_L001_R2_001.fastq
## Encountered 7783 unique sequences from 22375 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H3_S406_L001_R2_001.fastq
## Encountered 2745 unique sequences from 7103 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H5_S407_L001_R2_001.fastq
## Encountered 3779 unique sequences from 12450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-7-8-H7_S408_L001_R2_001.fastq
## Encountered 7183 unique sequences from 22286 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H2_S409_L001_R2_001.fastq
## Encountered 4948 unique sequences from 15351 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H5_S410_L001_R2_001.fastq
## Encountered 6095 unique sequences from 13489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H6_S411_L001_R2_001.fastq
## Encountered 4031 unique sequences from 11447 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H7_S412_L001_R2_001.fastq
## Encountered 9038 unique sequences from 21306 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H8_S413_L001_R2_001.fastq
## Encountered 4366 unique sequences from 11628 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-2023-8-4-H9_S414_L001_R2_001.fastq
## Encountered 269 unique sequences from 344 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba001_S415_L001_R2_001.fastq
## Encountered 3906 unique sequences from 11589 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba002_S416_L001_R2_001.fastq
## Encountered 3979 unique sequences from 8068 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Ba003_S417_L001_R2_001.fastq
## Encountered 2369 unique sequences from 7634 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb001_S418_L001_R2_001.fastq
## Encountered 4114 unique sequences from 13734 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb002_S419_L001_R2_001.fastq
## Encountered 5382 unique sequences from 19049 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb003_S420_L001_R2_001.fastq
## Encountered 8722 unique sequences from 25904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb004_S421_L001_R2_001.fastq
## Encountered 8655 unique sequences from 23070 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb005_S422_L001_R2_001.fastq
## Encountered 4170 unique sequences from 11519 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb007_S423_L001_R2_001.fastq
## Encountered 4062 unique sequences from 12125 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb008_S424_L001_R2_001.fastq
## Encountered 2318 unique sequences from 6567 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb009_S425_L001_R2_001.fastq
## Encountered 3125 unique sequences from 8599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb010_S426_L001_R2_001.fastq
## Encountered 3905 unique sequences from 10777 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb011_S427_L001_R2_001.fastq
## Encountered 4056 unique sequences from 8051 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb012_S428_L001_R2_001.fastq
## Encountered 9202 unique sequences from 27231 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb013_S429_L001_R2_001.fastq
## Encountered 2889 unique sequences from 6172 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb014_S430_L001_R2_001.fastq
## Encountered 5348 unique sequences from 15434 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb015_S431_L001_R2_001.fastq
## Encountered 2924 unique sequences from 7061 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb016_S432_L001_R2_001.fastq
## Encountered 2203 unique sequences from 6078 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb017_S433_L001_R2_001.fastq
## Encountered 3042 unique sequences from 7797 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb018_S434_L001_R2_001.fastq
## Encountered 6408 unique sequences from 16225 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb019_S435_L001_R2_001.fastq
## Encountered 5803 unique sequences from 19040 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb020_S436_L001_R2_001.fastq
## Encountered 5081 unique sequences from 20153 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb021_S437_L001_R2_001.fastq
## Encountered 3200 unique sequences from 6625 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb022_S438_L001_R2_001.fastq
## Encountered 4318 unique sequences from 15270 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb023_S439_L001_R2_001.fastq
## Encountered 2922 unique sequences from 8291 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb024_S440_L001_R2_001.fastq
## Encountered 4733 unique sequences from 15599 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bb025_S441_L001_R2_001.fastq
## Encountered 3079 unique sequences from 9402 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf001_S442_L001_R2_001.fastq
## Encountered 6606 unique sequences from 18214 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf002_S443_L001_R2_001.fastq
## Encountered 3 unique sequences from 4 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf003_S444_L001_R2_001.fastq
## Encountered 3745 unique sequences from 10825 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bf004_S445_L001_R2_001.fastq
## Encountered 2303 unique sequences from 5494 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg001_S446_L001_R2_001.fastq
## Encountered 2221 unique sequences from 6774 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg002_S447_L001_R2_001.fastq
## Encountered 4217 unique sequences from 9593 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg003_S448_L001_R2_001.fastq
## Encountered 4455 unique sequences from 9878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg004_S449_L001_R2_001.fastq
## Encountered 2797 unique sequences from 8208 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg005_S450_L001_R2_001.fastq
## Encountered 5003 unique sequences from 11754 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg006_S451_L001_R2_001.fastq
## Encountered 4574 unique sequences from 14258 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg007_S452_L001_R2_001.fastq
## Encountered 4175 unique sequences from 13163 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg008_S453_L001_R2_001.fastq
## Encountered 4859 unique sequences from 12164 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg009_S454_L001_R2_001.fastq
## Encountered 2931 unique sequences from 5238 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg010_S455_L001_R2_001.fastq
## Encountered 3208 unique sequences from 6285 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg011_S456_L001_R2_001.fastq
## Encountered 4042 unique sequences from 10184 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg012_S457_L001_R2_001.fastq
## Encountered 2512 unique sequences from 7157 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg013_S458_L001_R2_001.fastq
## Encountered 3832 unique sequences from 12665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg014_S459_L001_R2_001.fastq
## Encountered 3482 unique sequences from 12895 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg015_S460_L001_R2_001.fastq
## Encountered 3024 unique sequences from 6987 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg016_S461_L001_R2_001.fastq
## Encountered 2581 unique sequences from 5489 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg017_S462_L001_R2_001.fastq
## Encountered 2079 unique sequences from 5605 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg018_S463_L001_R2_001.fastq
## Encountered 2742 unique sequences from 6740 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bg019_S464_L001_R2_001.fastq
## Encountered 3018 unique sequences from 8707 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi001_S465_L001_R2_001.fastq
## Encountered 3309 unique sequences from 10410 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi002_S466_L001_R2_001.fastq
## Encountered 4944 unique sequences from 13359 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi003_S467_L001_R2_001.fastq
## Encountered 7625 unique sequences from 20553 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi004_S468_L001_R2_001.fastq
## Encountered 6359 unique sequences from 18623 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi005_S469_L001_R2_001.fastq
## Encountered 62 unique sequences from 149 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi006_S470_L001_R2_001.fastq
## Encountered 2038 unique sequences from 4554 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-Bi007_S471_L001_R2_001.fastq
## Encountered 4492 unique sequences from 9976 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-CKC0001_S472_L001_R2_001.fastq
## Encountered 2291 unique sequences from 7833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ESE0004_S473_L001_R2_001.fastq
## Encountered 3182 unique sequences from 9116 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20230909_S474_L001_R2_001.fastq
## Encountered 1 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231007_S477_L001_R2_001.fastq
## Encountered 3 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-20231008_S478_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220A_S480_L001_R2_001.fastq
## Encountered 25 unique sequences from 37 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024220B_S481_L001_R2_001.fastq
## Encountered 969 unique sequences from 2165 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221A_S482_L001_R2_001.fastq
## Encountered 1253 unique sequences from 2878 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024221B_S483_L001_R2_001.fastq
## Encountered 281 unique sequences from 622 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222A_S484_L001_R2_001.fastq
## Encountered 411 unique sequences from 823 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024222B_S485_L001_R2_001.fastq
## Encountered 2420 unique sequences from 6313 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312A_S486_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024312B_S487_L001_R2_001.fastq
## Encountered 6 unique sequences from 6 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314A_S488_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024314B_S489_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-ext-neg-ctrl-2024319_S490_L001_R2_001.fastq
## Encountered 84 unique sequences from 180 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0007_S492_L001_R2_001.fastq
## Encountered 3794 unique sequences from 8407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0027_S494_L001_R2_001.fastq
## Encountered 3080 unique sequences from 7425 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0044_S495_L001_R2_001.fastq
## Encountered 528 unique sequences from 941 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0045_S496_L001_R2_001.fastq
## Encountered 5511 unique sequences from 16853 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0052_S497_L001_R2_001.fastq
## Encountered 534 unique sequences from 1057 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0054_S498_L001_R2_001.fastq
## Encountered 368 unique sequences from 991 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0055_S499_L001_R2_001.fastq
## Encountered 536 unique sequences from 1345 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0071_S500_L001_R2_001.fastq
## Encountered 2765 unique sequences from 10060 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0095_S501_L001_R2_001.fastq
## Encountered 5254 unique sequences from 18665 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0096_S502_L001_R2_001.fastq
## Encountered 3729 unique sequences from 11780 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0105_S503_L001_R2_001.fastq
## Encountered 5068 unique sequences from 20059 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0106_S504_L001_R2_001.fastq
## Encountered 5117 unique sequences from 13590 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0119_S505_L001_R2_001.fastq
## Encountered 3283 unique sequences from 10904 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0134_S506_L001_R2_001.fastq
## Encountered 3548 unique sequences from 10331 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0135_S507_L001_R2_001.fastq
## Encountered 773 unique sequences from 1762 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0136_S508_L001_R2_001.fastq
## Encountered 3781 unique sequences from 10072 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0137_S509_L001_R2_001.fastq
## Encountered 8283 unique sequences from 21450 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0138_S510_L001_R2_001.fastq
## Encountered 3769 unique sequences from 10763 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0139_S511_L001_R2_001.fastq
## Encountered 4373 unique sequences from 10490 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0150_S512_L001_R2_001.fastq
## Encountered 5095 unique sequences from 13943 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0153_S513_L001_R2_001.fastq
## Encountered 1536 unique sequences from 3156 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0155_S514_L001_R2_001.fastq
## Encountered 4967 unique sequences from 15382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0156_S515_L001_R2_001.fastq
## Encountered 9150 unique sequences from 27268 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0159_S516_L001_R2_001.fastq
## Encountered 7054 unique sequences from 19126 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0163_S517_L001_R2_001.fastq
## Encountered 3652 unique sequences from 8259 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0165_S518_L001_R2_001.fastq
## Encountered 3258 unique sequences from 7382 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0167_S519_L001_R2_001.fastq
## Encountered 4721 unique sequences from 12356 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0168_S520_L001_R2_001.fastq
## Encountered 2768 unique sequences from 9986 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0169_S521_L001_R2_001.fastq
## Encountered 7078 unique sequences from 18064 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0170_S522_L001_R2_001.fastq
## Encountered 3738 unique sequences from 9654 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0200_S523_L001_R2_001.fastq
## Encountered 3915 unique sequences from 14566 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0201_S524_L001_R2_001.fastq
## Encountered 7435 unique sequences from 23716 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0205_S525_L001_R2_001.fastq
## Encountered 4685 unique sequences from 12367 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0209_S526_L001_R2_001.fastq
## Encountered 1181 unique sequences from 2911 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0221_S527_L001_R2_001.fastq
## Encountered 3655 unique sequences from 8200 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0224_S528_L001_R2_001.fastq
## Encountered 3144 unique sequences from 11029 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0225_S529_L001_R2_001.fastq
## Encountered 5684 unique sequences from 15454 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0227_S530_L001_R2_001.fastq
## Encountered 2821 unique sequences from 9704 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0241_S531_L001_R2_001.fastq
## Encountered 5510 unique sequences from 13922 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0244_S532_L001_R2_001.fastq
## Encountered 5029 unique sequences from 12161 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0246_S533_L001_R2_001.fastq
## Encountered 2729 unique sequences from 5407 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0248_S534_L001_R2_001.fastq
## Encountered 8228 unique sequences from 23791 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0253_S535_L001_R2_001.fastq
## Encountered 4999 unique sequences from 14833 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0254_S536_L001_R2_001.fastq
## Encountered 3663 unique sequences from 9069 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0256_S493_L001_R2_001.fastq
## Encountered 8791 unique sequences from 26403 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0259_S537_L001_R2_001.fastq
## Encountered 4608 unique sequences from 14649 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0263_S538_L001_R2_001.fastq
## Encountered 673 unique sequences from 1481 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0266_S539_L001_R2_001.fastq
## Encountered 469 unique sequences from 896 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-KLS0272_S540_L001_R2_001.fastq
## Encountered 688 unique sequences from 977 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-crtl-20240417_S541_L001_R2_001.fastq
## Encountered 1046 unique sequences from 2444 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240409_S542_L001_R2_001.fastq
## Encountered 194 unique sequences from 462 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418A_S543_L001_R2_001.fastq
## Encountered 3841 unique sequences from 8900 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R2_001.fastq
## Encountered 21 unique sequences from 43 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-pcr-rbcL-neg-ctrl-20240523_S545_L001_R2_001.fastq
## Encountered 1 unique sequences from 1 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-rbcL-pcr-neg-ctrl-20231021-20231119_S548_L001_R2_001.fastq
## Encountered 2 unique sequences from 3 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0009_S551_L001_R2_001.fastq
## Encountered 6199 unique sequences from 18374 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0010_S552_L001_R2_001.fastq
## Encountered 4800 unique sequences from 13370 total sequences read.
## Dereplicating sequence entries in Fastq file: /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered/rbcL-SCA0013_S553_L001_R2_001.fastq
## Encountered 6859 unique sequences from 18429 total sequences read.
#Note that the dereplicated sequences only exist in the R environment, and are not saved into a separate output subdirectory

Updating sample names (after samples drop out)

Extract sample names from filtF (to only include samples that passed the previous filter)

# my file names have 'junk' at the beginning and end of the file name
                  basename(filtFs[241])
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B_S544_L001_R1_001.fastq"
         strsplit(basename(filtFs[241]),"_S")
## [[1]]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B" "544_L001_R1_001.fastq"
         strsplit(basename(filtFs[241]),"_S")[[1]][1]
## [1] "rbcL-pcr-rbcL-neg-ctrl-20240418B"
strsplit(strsplit(basename(filtFs[241]),"_S")[[1]][1],"-")[[1]]
## [1] "rbcL"      "pcr"       "rbcL"      "neg"       "ctrl"      "20240418B"
# and their structure (esp length) differs between worker samples, queen samples, extraction negative controls, and pcr negative controls. 
paste(strsplit(strsplit(basename(filtFs[241]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "pcr_rbcL_neg_ctrl_20240418B"
paste(strsplit(strsplit(basename(filtFs[226]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "KLS0227"
paste(strsplit(strsplit(basename(filtFs[176]),"_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "ext_neg_ctrl_20231007"
paste(strsplit(strsplit(basename(filtFs[1]),  "_S")[[1]][1],"-")[[1]][-1],collapse="_")
## [1] "2020_6_16_H1"
# make a simple function to replicate above
get.sample.name <- function(fname) paste(strsplit(strsplit(basename(fname[1]),  "_S")[[1]][1],"-")[[1]][-1],collapse="_")

sample.names <- unname(sapply(filtFs, get.sample.name))
head(sample.names)
## [1] "2020_6_16_H1" "2020_6_16_H5" "2020_6_16_H6" "2020_6_17_H2" "2020_6_17_H4"
## [6] "2020_6_17_H8"
length(sample.names)
## [1] 246
# Name the dereplicated class objects by the sample names
names(derepFs) <- sample.names
names(derepRs) <- sample.names

Denoise reads to resolve exact sequences with dada2

At this step, the core sample inference algorithm is applied to the dereplicated sequences from /scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/rbcL/cutadapt/filtered (remember that the dereplicated sequences only exist in the R environment)

DADA2 infers sample sequences exactly and resolves differences of as little as 1 nucleotide using the models of the error rates we learned in the previous step

dadaFs <- dada(derepFs, err = errF, multithread = TRUE)
## Sample 1 - 9241 reads in 1458 unique sequences.
## Sample 2 - 5929 reads in 1990 unique sequences.
## Sample 3 - 850 reads in 242 unique sequences.
## Sample 4 - 4559 reads in 1916 unique sequences.
## Sample 5 - 2027 reads in 580 unique sequences.
## Sample 6 - 15437 reads in 2426 unique sequences.
## Sample 7 - 4021 reads in 1391 unique sequences.
## Sample 8 - 8636 reads in 1622 unique sequences.
## Sample 9 - 4778 reads in 959 unique sequences.
## Sample 10 - 15258 reads in 3209 unique sequences.
## Sample 11 - 17366 reads in 4372 unique sequences.
## Sample 12 - 17405 reads in 3275 unique sequences.
## Sample 13 - 12116 reads in 2994 unique sequences.
## Sample 14 - 12313 reads in 2674 unique sequences.
## Sample 15 - 12755 reads in 2500 unique sequences.
## Sample 16 - 851 reads in 355 unique sequences.
## Sample 17 - 350 reads in 166 unique sequences.
## Sample 18 - 8162 reads in 2544 unique sequences.
## Sample 19 - 16183 reads in 3350 unique sequences.
## Sample 20 - 13260 reads in 2798 unique sequences.
## Sample 21 - 12153 reads in 2281 unique sequences.
## Sample 22 - 13295 reads in 3378 unique sequences.
## Sample 23 - 2 reads in 2 unique sequences.
## Sample 24 - 13616 reads in 2738 unique sequences.
## Sample 25 - 1 reads in 1 unique sequences.
## Sample 26 - 1 reads in 1 unique sequences.
## Sample 27 - 8785 reads in 1485 unique sequences.
## Sample 28 - 36464 reads in 5234 unique sequences.
## Sample 29 - 8836 reads in 1441 unique sequences.
## Sample 30 - 1 reads in 1 unique sequences.
## Sample 31 - 7246 reads in 1382 unique sequences.
## Sample 32 - 9836 reads in 2149 unique sequences.
## Sample 33 - 14023 reads in 2901 unique sequences.
## Sample 34 - 14260 reads in 2254 unique sequences.
## Sample 35 - 11753 reads in 2637 unique sequences.
## Sample 36 - 12176 reads in 1857 unique sequences.
## Sample 37 - 11728 reads in 2604 unique sequences.
## Sample 38 - 10651 reads in 1602 unique sequences.
## Sample 39 - 12915 reads in 2309 unique sequences.
## Sample 40 - 16187 reads in 2961 unique sequences.
## Sample 41 - 17032 reads in 2412 unique sequences.
## Sample 42 - 13821 reads in 2377 unique sequences.
## Sample 43 - 15729 reads in 2963 unique sequences.
## Sample 44 - 19232 reads in 1811 unique sequences.
## Sample 45 - 12550 reads in 3128 unique sequences.
## Sample 46 - 10169 reads in 1877 unique sequences.
## Sample 47 - 6196 reads in 1527 unique sequences.
## Sample 48 - 10332 reads in 2508 unique sequences.
## Sample 49 - 9104 reads in 1528 unique sequences.
## Sample 50 - 11907 reads in 2284 unique sequences.
## Sample 51 - 715 reads in 191 unique sequences.
## Sample 52 - 11218 reads in 2264 unique sequences.
## Sample 53 - 4717 reads in 1183 unique sequences.
## Sample 54 - 4547 reads in 1069 unique sequences.
## Sample 55 - 1152 reads in 440 unique sequences.
## Sample 56 - 2834 reads in 1090 unique sequences.
## Sample 57 - 3400 reads in 1094 unique sequences.
## Sample 58 - 7362 reads in 1689 unique sequences.
## Sample 59 - 11939 reads in 2915 unique sequences.
## Sample 60 - 10457 reads in 2123 unique sequences.
## Sample 61 - 22653 reads in 2658 unique sequences.
## Sample 62 - 4 reads in 4 unique sequences.
## Sample 63 - 9282 reads in 1642 unique sequences.
## Sample 64 - 12630 reads in 1689 unique sequences.
## Sample 65 - 1 reads in 1 unique sequences.
## Sample 66 - 13905 reads in 2779 unique sequences.
## Sample 67 - 13693 reads in 3232 unique sequences.
## Sample 68 - 15342 reads in 1829 unique sequences.
## Sample 69 - 11288 reads in 1860 unique sequences.
## Sample 70 - 6412 reads in 1177 unique sequences.
## Sample 71 - 14325 reads in 2436 unique sequences.
## Sample 72 - 8964 reads in 1678 unique sequences.
## Sample 73 - 12999 reads in 2391 unique sequences.
## Sample 74 - 8254 reads in 1918 unique sequences.
## Sample 75 - 3377 reads in 921 unique sequences.
## Sample 76 - 15274 reads in 2373 unique sequences.
## Sample 77 - 14310 reads in 2981 unique sequences.
## Sample 78 - 12646 reads in 2371 unique sequences.
## Sample 79 - 11967 reads in 2142 unique sequences.
## Sample 80 - 8763 reads in 2587 unique sequences.
## Sample 81 - 9782 reads in 2773 unique sequences.
## Sample 82 - 13012 reads in 3801 unique sequences.
## Sample 83 - 5350 reads in 2176 unique sequences.
## Sample 84 - 5962 reads in 2165 unique sequences.
## Sample 85 - 10330 reads in 3472 unique sequences.
## Sample 86 - 6278 reads in 1942 unique sequences.
## Sample 87 - 11398 reads in 3675 unique sequences.
## Sample 88 - 9094 reads in 1906 unique sequences.
## Sample 89 - 10848 reads in 1706 unique sequences.
## Sample 90 - 11980 reads in 2437 unique sequences.
## Sample 91 - 23277 reads in 5129 unique sequences.
## Sample 92 - 7104 reads in 2379 unique sequences.
## Sample 93 - 18724 reads in 3223 unique sequences.
## Sample 94 - 13047 reads in 2230 unique sequences.
## Sample 95 - 21557 reads in 4109 unique sequences.
## Sample 96 - 7624 reads in 2315 unique sequences.
## Sample 97 - 8641 reads in 1583 unique sequences.
## Sample 98 - 8843 reads in 1829 unique sequences.
## Sample 99 - 19278 reads in 3136 unique sequences.
## Sample 100 - 8704 reads in 1142 unique sequences.
## Sample 101 - 4403 reads in 1078 unique sequences.
## Sample 102 - 6719 reads in 1269 unique sequences.
## Sample 103 - 4171 reads in 1042 unique sequences.
## Sample 104 - 11259 reads in 2242 unique sequences.
## Sample 105 - 19324 reads in 3890 unique sequences.
## Sample 106 - 22375 reads in 4267 unique sequences.
## Sample 107 - 7103 reads in 1435 unique sequences.
## Sample 108 - 12450 reads in 1913 unique sequences.
## Sample 109 - 22286 reads in 3427 unique sequences.
## Sample 110 - 15351 reads in 2576 unique sequences.
## Sample 111 - 13489 reads in 3942 unique sequences.
## Sample 112 - 11447 reads in 2186 unique sequences.
## Sample 113 - 21306 reads in 5257 unique sequences.
## Sample 114 - 11628 reads in 2240 unique sequences.
## Sample 115 - 344 reads in 232 unique sequences.
## Sample 116 - 11589 reads in 1936 unique sequences.
## Sample 117 - 8068 reads in 2555 unique sequences.
## Sample 118 - 7634 reads in 1150 unique sequences.
## Sample 119 - 13734 reads in 2073 unique sequences.
## Sample 120 - 19049 reads in 2733 unique sequences.
## Sample 121 - 25904 reads in 4817 unique sequences.
## Sample 122 - 23070 reads in 4831 unique sequences.
## Sample 123 - 11519 reads in 2293 unique sequences.
## Sample 124 - 12125 reads in 2107 unique sequences.
## Sample 125 - 6567 reads in 1172 unique sequences.
## Sample 126 - 8599 reads in 1672 unique sequences.
## Sample 127 - 10777 reads in 1976 unique sequences.
## Sample 128 - 8051 reads in 2442 unique sequences.
## Sample 129 - 27231 reads in 4932 unique sequences.
## Sample 130 - 6172 reads in 1573 unique sequences.
## Sample 131 - 15434 reads in 2922 unique sequences.
## Sample 132 - 7061 reads in 1641 unique sequences.
## Sample 133 - 6078 reads in 1258 unique sequences.
## Sample 134 - 7797 reads in 1724 unique sequences.
## Sample 135 - 16225 reads in 3752 unique sequences.
## Sample 136 - 19040 reads in 2852 unique sequences.
## Sample 137 - 20153 reads in 2300 unique sequences.
## Sample 138 - 6625 reads in 1942 unique sequences.
## Sample 139 - 15270 reads in 2128 unique sequences.
## Sample 140 - 8291 reads in 1651 unique sequences.
## Sample 141 - 15599 reads in 2620 unique sequences.
## Sample 142 - 9402 reads in 1501 unique sequences.
## Sample 143 - 18214 reads in 3433 unique sequences.
## Sample 144 - 4 reads in 3 unique sequences.
## Sample 145 - 10825 reads in 1932 unique sequences.
## Sample 146 - 5494 reads in 1389 unique sequences.
## Sample 147 - 6774 reads in 1199 unique sequences.
## Sample 148 - 9593 reads in 2431 unique sequences.
## Sample 149 - 9878 reads in 2390 unique sequences.
## Sample 150 - 8208 reads in 1476 unique sequences.
## Sample 151 - 11754 reads in 3025 unique sequences.
## Sample 152 - 14258 reads in 2148 unique sequences.
## Sample 153 - 13163 reads in 1728 unique sequences.
## Sample 154 - 12164 reads in 2815 unique sequences.
## Sample 155 - 5238 reads in 2005 unique sequences.
## Sample 156 - 6285 reads in 2092 unique sequences.
## Sample 157 - 10184 reads in 2397 unique sequences.
## Sample 158 - 7157 reads in 1236 unique sequences.
## Sample 159 - 12665 reads in 1739 unique sequences.
## Sample 160 - 12895 reads in 1893 unique sequences.
## Sample 161 - 6987 reads in 1793 unique sequences.
## Sample 162 - 5489 reads in 1522 unique sequences.
## Sample 163 - 5605 reads in 1124 unique sequences.
## Sample 164 - 6740 reads in 1468 unique sequences.
## Sample 165 - 8707 reads in 1534 unique sequences.
## Sample 166 - 10410 reads in 1691 unique sequences.
## Sample 167 - 13359 reads in 2813 unique sequences.
## Sample 168 - 20553 reads in 4425 unique sequences.
## Sample 169 - 18623 reads in 3192 unique sequences.
## Sample 170 - 149 reads in 36 unique sequences.
## Sample 171 - 4554 reads in 1268 unique sequences.
## Sample 172 - 9976 reads in 2803 unique sequences.
## Sample 173 - 7833 reads in 1046 unique sequences.
## Sample 174 - 9116 reads in 1588 unique sequences.
## Sample 175 - 3 reads in 2 unique sequences.
## Sample 176 - 3 reads in 3 unique sequences.
## Sample 177 - 1 reads in 1 unique sequences.
## Sample 178 - 37 reads in 26 unique sequences.
## Sample 179 - 2165 reads in 518 unique sequences.
## Sample 180 - 2878 reads in 686 unique sequences.
## Sample 181 - 622 reads in 151 unique sequences.
## Sample 182 - 823 reads in 198 unique sequences.
## Sample 183 - 6313 reads in 1232 unique sequences.
## Sample 184 - 1 reads in 1 unique sequences.
## Sample 185 - 6 reads in 6 unique sequences.
## Sample 186 - 1 reads in 1 unique sequences.
## Sample 187 - 1 reads in 1 unique sequences.
## Sample 188 - 180 reads in 43 unique sequences.
## Sample 189 - 8407 reads in 2322 unique sequences.
## Sample 190 - 7425 reads in 1596 unique sequences.
## Sample 191 - 941 reads in 286 unique sequences.
## Sample 192 - 16853 reads in 2717 unique sequences.
## Sample 193 - 1057 reads in 254 unique sequences.
## Sample 194 - 991 reads in 179 unique sequences.
## Sample 195 - 1345 reads in 233 unique sequences.
## Sample 196 - 10060 reads in 1254 unique sequences.
## Sample 197 - 18665 reads in 2376 unique sequences.
## Sample 198 - 11780 reads in 1976 unique sequences.
## Sample 199 - 20059 reads in 2328 unique sequences.
## Sample 200 - 13590 reads in 2956 unique sequences.
## Sample 201 - 10904 reads in 1553 unique sequences.
## Sample 202 - 10331 reads in 1712 unique sequences.
## Sample 203 - 1762 reads in 452 unique sequences.
## Sample 204 - 10072 reads in 2012 unique sequences.
## Sample 205 - 21450 reads in 4607 unique sequences.
## Sample 206 - 10763 reads in 1797 unique sequences.
## Sample 207 - 10490 reads in 2508 unique sequences.
## Sample 208 - 13943 reads in 2725 unique sequences.
## Sample 209 - 3156 reads in 1076 unique sequences.
## Sample 210 - 15382 reads in 2738 unique sequences.
## Sample 211 - 27268 reads in 4653 unique sequences.
## Sample 212 - 19126 reads in 3951 unique sequences.
## Sample 213 - 8259 reads in 2418 unique sequences.
## Sample 214 - 7382 reads in 2021 unique sequences.
## Sample 215 - 12356 reads in 2582 unique sequences.
## Sample 216 - 9986 reads in 1500 unique sequences.
## Sample 217 - 18064 reads in 4242 unique sequences.
## Sample 218 - 9654 reads in 2102 unique sequences.
## Sample 219 - 14566 reads in 1968 unique sequences.
## Sample 220 - 23716 reads in 4120 unique sequences.
## Sample 221 - 12367 reads in 3134 unique sequences.
## Sample 222 - 2911 reads in 685 unique sequences.
## Sample 223 - 8200 reads in 2247 unique sequences.
## Sample 224 - 11029 reads in 1547 unique sequences.
## Sample 225 - 15454 reads in 3266 unique sequences.
## Sample 226 - 9704 reads in 1554 unique sequences.
## Sample 227 - 13922 reads in 2935 unique sequences.
## Sample 228 - 12161 reads in 3025 unique sequences.
## Sample 229 - 5407 reads in 1672 unique sequences.
## Sample 230 - 23791 reads in 4225 unique sequences.
## Sample 231 - 14833 reads in 2633 unique sequences.
## Sample 232 - 9069 reads in 2234 unique sequences.
## Sample 233 - 26403 reads in 5437 unique sequences.
## Sample 234 - 14649 reads in 2285 unique sequences.
## Sample 235 - 1481 reads in 413 unique sequences.
## Sample 236 - 896 reads in 294 unique sequences.
## Sample 237 - 977 reads in 506 unique sequences.
## Sample 238 - 2444 reads in 599 unique sequences.
## Sample 239 - 462 reads in 111 unique sequences.
## Sample 240 - 8900 reads in 2234 unique sequences.
## Sample 241 - 43 reads in 21 unique sequences.
## Sample 242 - 1 reads in 1 unique sequences.
## Sample 243 - 3 reads in 3 unique sequences.
## Sample 244 - 18374 reads in 3186 unique sequences.
## Sample 245 - 13370 reads in 2303 unique sequences.
## Sample 246 - 18429 reads in 3542 unique sequences.
dadaRs <- dada(derepRs, err = errR, multithread = TRUE)
## Sample 1 - 9241 reads in 3205 unique sequences.
## Sample 2 - 5929 reads in 3022 unique sequences.
## Sample 3 - 850 reads in 397 unique sequences.
## Sample 4 - 4559 reads in 2719 unique sequences.
## Sample 5 - 2027 reads in 930 unique sequences.
## Sample 6 - 15437 reads in 4895 unique sequences.
## Sample 7 - 4021 reads in 2167 unique sequences.
## Sample 8 - 8636 reads in 3009 unique sequences.
## Sample 9 - 4778 reads in 1876 unique sequences.
## Sample 10 - 15258 reads in 5820 unique sequences.
## Sample 11 - 17366 reads in 7596 unique sequences.
## Sample 12 - 17405 reads in 6427 unique sequences.
## Sample 13 - 12116 reads in 4939 unique sequences.
## Sample 14 - 12313 reads in 4950 unique sequences.
## Sample 15 - 12755 reads in 4654 unique sequences.
## Sample 16 - 851 reads in 517 unique sequences.
## Sample 17 - 350 reads in 234 unique sequences.
## Sample 18 - 8162 reads in 4123 unique sequences.
## Sample 19 - 16183 reads in 5848 unique sequences.
## Sample 20 - 13260 reads in 5067 unique sequences.
## Sample 21 - 12153 reads in 4305 unique sequences.
## Sample 22 - 13295 reads in 5534 unique sequences.
## Sample 23 - 2 reads in 2 unique sequences.
## Sample 24 - 13616 reads in 4682 unique sequences.
## Sample 25 - 1 reads in 1 unique sequences.
## Sample 26 - 1 reads in 1 unique sequences.
## Sample 27 - 8785 reads in 3038 unique sequences.
## Sample 28 - 36464 reads in 11402 unique sequences.
## Sample 29 - 8836 reads in 2804 unique sequences.
## Sample 30 - 1 reads in 1 unique sequences.
## Sample 31 - 7246 reads in 2495 unique sequences.
## Sample 32 - 9836 reads in 4020 unique sequences.
## Sample 33 - 14023 reads in 5076 unique sequences.
## Sample 34 - 14260 reads in 4494 unique sequences.
## Sample 35 - 11753 reads in 4657 unique sequences.
## Sample 36 - 12176 reads in 3597 unique sequences.
## Sample 37 - 11728 reads in 4682 unique sequences.
## Sample 38 - 10651 reads in 3150 unique sequences.
## Sample 39 - 12915 reads in 4269 unique sequences.
## Sample 40 - 16187 reads in 5422 unique sequences.
## Sample 41 - 17032 reads in 5073 unique sequences.
## Sample 42 - 13821 reads in 4537 unique sequences.
## Sample 43 - 15729 reads in 5712 unique sequences.
## Sample 44 - 19232 reads in 4554 unique sequences.
## Sample 45 - 12550 reads in 5336 unique sequences.
## Sample 46 - 10169 reads in 3726 unique sequences.
## Sample 47 - 6196 reads in 2640 unique sequences.
## Sample 48 - 10332 reads in 4159 unique sequences.
## Sample 49 - 9104 reads in 3139 unique sequences.
## Sample 50 - 11907 reads in 4338 unique sequences.
## Sample 51 - 715 reads in 329 unique sequences.
## Sample 52 - 11218 reads in 4133 unique sequences.
## Sample 53 - 4717 reads in 2153 unique sequences.
## Sample 54 - 4547 reads in 1819 unique sequences.
## Sample 55 - 1152 reads in 674 unique sequences.
## Sample 56 - 2834 reads in 1686 unique sequences.
## Sample 57 - 3400 reads in 1664 unique sequences.
## Sample 58 - 7362 reads in 3037 unique sequences.
## Sample 59 - 11939 reads in 4853 unique sequences.
## Sample 60 - 10457 reads in 3897 unique sequences.
## Sample 61 - 22653 reads in 5895 unique sequences.
## Sample 62 - 4 reads in 4 unique sequences.
## Sample 63 - 9282 reads in 3372 unique sequences.
## Sample 64 - 12630 reads in 3592 unique sequences.
## Sample 65 - 1 reads in 1 unique sequences.
## Sample 66 - 13905 reads in 4992 unique sequences.
## Sample 67 - 13693 reads in 5478 unique sequences.
## Sample 68 - 15342 reads in 4123 unique sequences.
## Sample 69 - 11288 reads in 3581 unique sequences.
## Sample 70 - 6412 reads in 2311 unique sequences.
## Sample 71 - 14325 reads in 4893 unique sequences.
## Sample 72 - 8964 reads in 3148 unique sequences.
## Sample 73 - 12999 reads in 4394 unique sequences.
## Sample 74 - 8254 reads in 3207 unique sequences.
## Sample 75 - 3377 reads in 1387 unique sequences.
## Sample 76 - 15274 reads in 4315 unique sequences.
## Sample 77 - 14310 reads in 5203 unique sequences.
## Sample 78 - 12646 reads in 4089 unique sequences.
## Sample 79 - 11967 reads in 3959 unique sequences.
## Sample 80 - 8763 reads in 3968 unique sequences.
## Sample 81 - 9782 reads in 4212 unique sequences.
## Sample 82 - 13012 reads in 5800 unique sequences.
## Sample 83 - 5350 reads in 2997 unique sequences.
## Sample 84 - 5962 reads in 3006 unique sequences.
## Sample 85 - 10330 reads in 4938 unique sequences.
## Sample 86 - 6278 reads in 2935 unique sequences.
## Sample 87 - 11398 reads in 5351 unique sequences.
## Sample 88 - 9094 reads in 3532 unique sequences.
## Sample 89 - 10848 reads in 3028 unique sequences.
## Sample 90 - 11980 reads in 4611 unique sequences.
## Sample 91 - 23277 reads in 8967 unique sequences.
## Sample 92 - 7104 reads in 3559 unique sequences.
## Sample 93 - 18724 reads in 6250 unique sequences.
## Sample 94 - 13047 reads in 4292 unique sequences.
## Sample 95 - 21557 reads in 7708 unique sequences.
## Sample 96 - 7624 reads in 3855 unique sequences.
## Sample 97 - 8641 reads in 3019 unique sequences.
## Sample 98 - 8843 reads in 3345 unique sequences.
## Sample 99 - 19278 reads in 6103 unique sequences.
## Sample 100 - 8704 reads in 2558 unique sequences.
## Sample 101 - 4403 reads in 1984 unique sequences.
## Sample 102 - 6719 reads in 2405 unique sequences.
## Sample 103 - 4171 reads in 1796 unique sequences.
## Sample 104 - 11259 reads in 4222 unique sequences.
## Sample 105 - 19324 reads in 7038 unique sequences.
## Sample 106 - 22375 reads in 7783 unique sequences.
## Sample 107 - 7103 reads in 2745 unique sequences.
## Sample 108 - 12450 reads in 3779 unique sequences.
## Sample 109 - 22286 reads in 7183 unique sequences.
## Sample 110 - 15351 reads in 4948 unique sequences.
## Sample 111 - 13489 reads in 6095 unique sequences.
## Sample 112 - 11447 reads in 4031 unique sequences.
## Sample 113 - 21306 reads in 9038 unique sequences.
## Sample 114 - 11628 reads in 4366 unique sequences.
## Sample 115 - 344 reads in 269 unique sequences.
## Sample 116 - 11589 reads in 3906 unique sequences.
## Sample 117 - 8068 reads in 3979 unique sequences.
## Sample 118 - 7634 reads in 2369 unique sequences.
## Sample 119 - 13734 reads in 4114 unique sequences.
## Sample 120 - 19049 reads in 5382 unique sequences.
## Sample 121 - 25904 reads in 8722 unique sequences.
## Sample 122 - 23070 reads in 8655 unique sequences.
## Sample 123 - 11519 reads in 4170 unique sequences.
## Sample 124 - 12125 reads in 4062 unique sequences.
## Sample 125 - 6567 reads in 2318 unique sequences.
## Sample 126 - 8599 reads in 3125 unique sequences.
## Sample 127 - 10777 reads in 3905 unique sequences.
## Sample 128 - 8051 reads in 4056 unique sequences.
## Sample 129 - 27231 reads in 9202 unique sequences.
## Sample 130 - 6172 reads in 2889 unique sequences.
## Sample 131 - 15434 reads in 5348 unique sequences.
## Sample 132 - 7061 reads in 2924 unique sequences.
## Sample 133 - 6078 reads in 2203 unique sequences.
## Sample 134 - 7797 reads in 3042 unique sequences.
## Sample 135 - 16225 reads in 6408 unique sequences.
## Sample 136 - 19040 reads in 5803 unique sequences.
## Sample 137 - 20153 reads in 5081 unique sequences.
## Sample 138 - 6625 reads in 3200 unique sequences.
## Sample 139 - 15270 reads in 4318 unique sequences.
## Sample 140 - 8291 reads in 2922 unique sequences.
## Sample 141 - 15599 reads in 4733 unique sequences.
## Sample 142 - 9402 reads in 3079 unique sequences.
## Sample 143 - 18214 reads in 6606 unique sequences.
## Sample 144 - 4 reads in 3 unique sequences.
## Sample 145 - 10825 reads in 3745 unique sequences.
## Sample 146 - 5494 reads in 2303 unique sequences.
## Sample 147 - 6774 reads in 2221 unique sequences.
## Sample 148 - 9593 reads in 4217 unique sequences.
## Sample 149 - 9878 reads in 4455 unique sequences.
## Sample 150 - 8208 reads in 2797 unique sequences.
## Sample 151 - 11754 reads in 5003 unique sequences.
## Sample 152 - 14258 reads in 4574 unique sequences.
## Sample 153 - 13163 reads in 4175 unique sequences.
## Sample 154 - 12164 reads in 4859 unique sequences.
## Sample 155 - 5238 reads in 2931 unique sequences.
## Sample 156 - 6285 reads in 3208 unique sequences.
## Sample 157 - 10184 reads in 4042 unique sequences.
## Sample 158 - 7157 reads in 2512 unique sequences.
## Sample 159 - 12665 reads in 3832 unique sequences.
## Sample 160 - 12895 reads in 3482 unique sequences.
## Sample 161 - 6987 reads in 3024 unique sequences.
## Sample 162 - 5489 reads in 2581 unique sequences.
## Sample 163 - 5605 reads in 2079 unique sequences.
## Sample 164 - 6740 reads in 2742 unique sequences.
## Sample 165 - 8707 reads in 3018 unique sequences.
## Sample 166 - 10410 reads in 3309 unique sequences.
## Sample 167 - 13359 reads in 4944 unique sequences.
## Sample 168 - 20553 reads in 7625 unique sequences.
## Sample 169 - 18623 reads in 6359 unique sequences.
## Sample 170 - 149 reads in 62 unique sequences.
## Sample 171 - 4554 reads in 2038 unique sequences.
## Sample 172 - 9976 reads in 4492 unique sequences.
## Sample 173 - 7833 reads in 2291 unique sequences.
## Sample 174 - 9116 reads in 3182 unique sequences.
## Sample 175 - 3 reads in 1 unique sequences.
## Sample 176 - 3 reads in 3 unique sequences.
## Sample 177 - 1 reads in 1 unique sequences.
## Sample 178 - 37 reads in 25 unique sequences.
## Sample 179 - 2165 reads in 969 unique sequences.
## Sample 180 - 2878 reads in 1253 unique sequences.
## Sample 181 - 622 reads in 281 unique sequences.
## Sample 182 - 823 reads in 411 unique sequences.
## Sample 183 - 6313 reads in 2420 unique sequences.
## Sample 184 - 1 reads in 1 unique sequences.
## Sample 185 - 6 reads in 6 unique sequences.
## Sample 186 - 1 reads in 1 unique sequences.
## Sample 187 - 1 reads in 1 unique sequences.
## Sample 188 - 180 reads in 84 unique sequences.
## Sample 189 - 8407 reads in 3794 unique sequences.
## Sample 190 - 7425 reads in 3080 unique sequences.
## Sample 191 - 941 reads in 528 unique sequences.
## Sample 192 - 16853 reads in 5511 unique sequences.
## Sample 193 - 1057 reads in 534 unique sequences.
## Sample 194 - 991 reads in 368 unique sequences.
## Sample 195 - 1345 reads in 536 unique sequences.
## Sample 196 - 10060 reads in 2765 unique sequences.
## Sample 197 - 18665 reads in 5254 unique sequences.
## Sample 198 - 11780 reads in 3729 unique sequences.
## Sample 199 - 20059 reads in 5068 unique sequences.
## Sample 200 - 13590 reads in 5117 unique sequences.
## Sample 201 - 10904 reads in 3283 unique sequences.
## Sample 202 - 10331 reads in 3548 unique sequences.
## Sample 203 - 1762 reads in 773 unique sequences.
## Sample 204 - 10072 reads in 3781 unique sequences.
## Sample 205 - 21450 reads in 8283 unique sequences.
## Sample 206 - 10763 reads in 3769 unique sequences.
## Sample 207 - 10490 reads in 4373 unique sequences.
## Sample 208 - 13943 reads in 5095 unique sequences.
## Sample 209 - 3156 reads in 1536 unique sequences.
## Sample 210 - 15382 reads in 4967 unique sequences.
## Sample 211 - 27268 reads in 9150 unique sequences.
## Sample 212 - 19126 reads in 7054 unique sequences.
## Sample 213 - 8259 reads in 3652 unique sequences.
## Sample 214 - 7382 reads in 3258 unique sequences.
## Sample 215 - 12356 reads in 4721 unique sequences.
## Sample 216 - 9986 reads in 2768 unique sequences.
## Sample 217 - 18064 reads in 7078 unique sequences.
## Sample 218 - 9654 reads in 3738 unique sequences.
## Sample 219 - 14566 reads in 3915 unique sequences.
## Sample 220 - 23716 reads in 7435 unique sequences.
## Sample 221 - 12367 reads in 4685 unique sequences.
## Sample 222 - 2911 reads in 1181 unique sequences.
## Sample 223 - 8200 reads in 3655 unique sequences.
## Sample 224 - 11029 reads in 3144 unique sequences.
## Sample 225 - 15454 reads in 5684 unique sequences.
## Sample 226 - 9704 reads in 2821 unique sequences.
## Sample 227 - 13922 reads in 5510 unique sequences.
## Sample 228 - 12161 reads in 5029 unique sequences.
## Sample 229 - 5407 reads in 2729 unique sequences.
## Sample 230 - 23791 reads in 8228 unique sequences.
## Sample 231 - 14833 reads in 4999 unique sequences.
## Sample 232 - 9069 reads in 3663 unique sequences.
## Sample 233 - 26403 reads in 8791 unique sequences.
## Sample 234 - 14649 reads in 4608 unique sequences.
## Sample 235 - 1481 reads in 673 unique sequences.
## Sample 236 - 896 reads in 469 unique sequences.
## Sample 237 - 977 reads in 688 unique sequences.
## Sample 238 - 2444 reads in 1046 unique sequences.
## Sample 239 - 462 reads in 194 unique sequences.
## Sample 240 - 8900 reads in 3841 unique sequences.
## Sample 241 - 43 reads in 21 unique sequences.
## Sample 242 - 1 reads in 1 unique sequences.
## Sample 243 - 3 reads in 2 unique sequences.
## Sample 244 - 18374 reads in 6199 unique sequences.
## Sample 245 - 13370 reads in 4800 unique sequences.
## Sample 246 - 18429 reads in 6859 unique sequences.

Merge paired reads

We’ve inferred the sample sequences in the forward and reverse reads independently. Now it’s time to merge those inferred sequences together, throwing out those pairs of reads that don’t match

mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE, minOverlap = 11, maxMismatch = 0)
## 8965 paired-reads (in 48 unique pairings) successfully merged out of 9165 (in 107 pairings) input.
## 4858 paired-reads (in 187 unique pairings) successfully merged out of 5713 (in 466 pairings) input.
## 761 paired-reads (in 7 unique pairings) successfully merged out of 823 (in 19 pairings) input.
## 2982 paired-reads (in 181 unique pairings) successfully merged out of 4385 (in 632 pairings) input.
## 1830 paired-reads (in 20 unique pairings) successfully merged out of 1959 (in 50 pairings) input.
## 14791 paired-reads (in 103 unique pairings) successfully merged out of 15299 (in 195 pairings) input.
## 3247 paired-reads (in 104 unique pairings) successfully merged out of 3893 (in 313 pairings) input.
## 8339 paired-reads (in 100 unique pairings) successfully merged out of 8558 (in 177 pairings) input.
## 4320 paired-reads (in 55 unique pairings) successfully merged out of 4728 (in 138 pairings) input.
## 13957 paired-reads (in 195 unique pairings) successfully merged out of 15051 (in 513 pairings) input.
## 15540 paired-reads (in 498 unique pairings) successfully merged out of 17073 (in 1021 pairings) input.
## 16216 paired-reads (in 254 unique pairings) successfully merged out of 17250 (in 507 pairings) input.
## 11003 paired-reads (in 197 unique pairings) successfully merged out of 11996 (in 506 pairings) input.
## 11620 paired-reads (in 250 unique pairings) successfully merged out of 12204 (in 487 pairings) input.
## 12018 paired-reads (in 173 unique pairings) successfully merged out of 12654 (in 382 pairings) input.
## 567 paired-reads (in 16 unique pairings) successfully merged out of 820 (in 65 pairings) input.
## 284 paired-reads (in 10 unique pairings) successfully merged out of 322 (in 20 pairings) input.
## 7061 paired-reads (in 395 unique pairings) successfully merged out of 7910 (in 739 pairings) input.
## 15185 paired-reads (in 202 unique pairings) successfully merged out of 15982 (in 399 pairings) input.
## 12306 paired-reads (in 246 unique pairings) successfully merged out of 13099 (in 519 pairings) input.
## 11510 paired-reads (in 86 unique pairings) successfully merged out of 12041 (in 186 pairings) input.
## 11719 paired-reads (in 267 unique pairings) successfully merged out of 13065 (in 670 pairings) input.
## No paired-reads (in ZERO unique pairings) successfully merged out of 2 pairings) input.
## 12827 paired-reads (in 139 unique pairings) successfully merged out of 13483 (in 337 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 8601 paired-reads (in 32 unique pairings) successfully merged out of 8731 (in 88 pairings) input.
## 35105 paired-reads (in 324 unique pairings) successfully merged out of 36129 (in 626 pairings) input.
## 8555 paired-reads (in 43 unique pairings) successfully merged out of 8740 (in 87 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 6958 paired-reads (in 61 unique pairings) successfully merged out of 7189 (in 125 pairings) input.
## 8993 paired-reads (in 163 unique pairings) successfully merged out of 9714 (in 395 pairings) input.
## 13210 paired-reads (in 158 unique pairings) successfully merged out of 13921 (in 338 pairings) input.
## 13769 paired-reads (in 104 unique pairings) successfully merged out of 14142 (in 211 pairings) input.
## 10503 paired-reads (in 243 unique pairings) successfully merged out of 11541 (in 537 pairings) input.
## 11736 paired-reads (in 42 unique pairings) successfully merged out of 12053 (in 129 pairings) input.
## 10687 paired-reads (in 179 unique pairings) successfully merged out of 11514 (in 431 pairings) input.
## 10452 paired-reads (in 63 unique pairings) successfully merged out of 10585 (in 96 pairings) input.
## 12426 paired-reads (in 170 unique pairings) successfully merged out of 12808 (in 284 pairings) input.
## 15566 paired-reads (in 197 unique pairings) successfully merged out of 16031 (in 376 pairings) input.
## 16581 paired-reads (in 116 unique pairings) successfully merged out of 16889 (in 214 pairings) input.
## 13193 paired-reads (in 144 unique pairings) successfully merged out of 13645 (in 266 pairings) input.
## 15055 paired-reads (in 275 unique pairings) successfully merged out of 15531 (in 444 pairings) input.
## 19176 paired-reads (in 5 unique pairings) successfully merged out of 19189 (in 9 pairings) input.
## 11605 paired-reads (in 337 unique pairings) successfully merged out of 12302 (in 659 pairings) input.
## 9643 paired-reads (in 163 unique pairings) successfully merged out of 9965 (in 268 pairings) input.
## 5787 paired-reads (in 154 unique pairings) successfully merged out of 6106 (in 274 pairings) input.
## 9722 paired-reads (in 209 unique pairings) successfully merged out of 10195 (in 400 pairings) input.
## 8741 paired-reads (in 55 unique pairings) successfully merged out of 9031 (in 136 pairings) input.
## 11174 paired-reads (in 181 unique pairings) successfully merged out of 11718 (in 347 pairings) input.
## 664 paired-reads (in 6 unique pairings) successfully merged out of 691 (in 12 pairings) input.
## 10227 paired-reads (in 144 unique pairings) successfully merged out of 11074 (in 365 pairings) input.
## 4340 paired-reads (in 83 unique pairings) successfully merged out of 4621 (in 179 pairings) input.
## 4188 paired-reads (in 53 unique pairings) successfully merged out of 4488 (in 131 pairings) input.
## 920 paired-reads (in 45 unique pairings) successfully merged out of 1127 (in 98 pairings) input.
## 1828 paired-reads (in 87 unique pairings) successfully merged out of 2688 (in 322 pairings) input.
## 2883 paired-reads (in 93 unique pairings) successfully merged out of 3285 (in 200 pairings) input.
## 6788 paired-reads (in 112 unique pairings) successfully merged out of 7181 (in 254 pairings) input.
## 10565 paired-reads (in 292 unique pairings) successfully merged out of 11766 (in 649 pairings) input.
## 9631 paired-reads (in 109 unique pairings) successfully merged out of 10312 (in 279 pairings) input.
## 22525 paired-reads (in 48 unique pairings) successfully merged out of 22615 (in 76 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 8856 paired-reads (in 33 unique pairings) successfully merged out of 9167 (in 80 pairings) input.
## 12409 paired-reads (in 65 unique pairings) successfully merged out of 12571 (in 105 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 13142 paired-reads (in 193 unique pairings) successfully merged out of 13746 (in 372 pairings) input.
## 12548 paired-reads (in 238 unique pairings) successfully merged out of 13365 (in 556 pairings) input.
## 15258 paired-reads (in 28 unique pairings) successfully merged out of 15294 (in 40 pairings) input.
## 10998 paired-reads (in 55 unique pairings) successfully merged out of 11174 (in 102 pairings) input.
## 6096 paired-reads (in 56 unique pairings) successfully merged out of 6328 (in 124 pairings) input.
## 13614 paired-reads (in 142 unique pairings) successfully merged out of 14241 (in 294 pairings) input.
## 8521 paired-reads (in 50 unique pairings) successfully merged out of 8883 (in 131 pairings) input.
## 12552 paired-reads (in 157 unique pairings) successfully merged out of 12883 (in 299 pairings) input.
## 7644 paired-reads (in 139 unique pairings) successfully merged out of 8190 (in 324 pairings) input.
## 3164 paired-reads (in 37 unique pairings) successfully merged out of 3324 (in 78 pairings) input.
## 14940 paired-reads (in 35 unique pairings) successfully merged out of 15170 (in 85 pairings) input.
## 13267 paired-reads (in 286 unique pairings) successfully merged out of 14165 (in 571 pairings) input.
## 12100 paired-reads (in 136 unique pairings) successfully merged out of 12553 (in 251 pairings) input.
## 11626 paired-reads (in 65 unique pairings) successfully merged out of 11844 (in 138 pairings) input.
## 7464 paired-reads (in 172 unique pairings) successfully merged out of 8422 (in 490 pairings) input.
## 8716 paired-reads (in 210 unique pairings) successfully merged out of 9594 (in 503 pairings) input.
## 11406 paired-reads (in 314 unique pairings) successfully merged out of 12672 (in 746 pairings) input.
## 4293 paired-reads (in 179 unique pairings) successfully merged out of 5111 (in 506 pairings) input.
## 5019 paired-reads (in 153 unique pairings) successfully merged out of 5771 (in 468 pairings) input.
## 8600 paired-reads (in 294 unique pairings) successfully merged out of 9938 (in 755 pairings) input.
## 5413 paired-reads (in 117 unique pairings) successfully merged out of 6051 (in 302 pairings) input.
## 9709 paired-reads (in 274 unique pairings) successfully merged out of 10928 (in 713 pairings) input.
## 8514 paired-reads (in 92 unique pairings) successfully merged out of 8877 (in 238 pairings) input.
## 10661 paired-reads (in 27 unique pairings) successfully merged out of 10800 (in 53 pairings) input.
## 10641 paired-reads (in 162 unique pairings) successfully merged out of 11847 (in 408 pairings) input.
## 21511 paired-reads (in 517 unique pairings) successfully merged out of 22922 (in 1120 pairings) input.
## 5749 paired-reads (in 199 unique pairings) successfully merged out of 6853 (in 545 pairings) input.
## 17494 paired-reads (in 239 unique pairings) successfully merged out of 18496 (in 496 pairings) input.
## 12124 paired-reads (in 138 unique pairings) successfully merged out of 12931 (in 275 pairings) input.
## 20032 paired-reads (in 307 unique pairings) successfully merged out of 21250 (in 699 pairings) input.
## 6429 paired-reads (in 258 unique pairings) successfully merged out of 7377 (in 556 pairings) input.
## 8191 paired-reads (in 72 unique pairings) successfully merged out of 8530 (in 132 pairings) input.
## 8426 paired-reads (in 120 unique pairings) successfully merged out of 8768 (in 242 pairings) input.
## 18638 paired-reads (in 258 unique pairings) successfully merged out of 19048 (in 416 pairings) input.
## 8604 paired-reads (in 20 unique pairings) successfully merged out of 8658 (in 32 pairings) input.
## 4010 paired-reads (in 92 unique pairings) successfully merged out of 4312 (in 195 pairings) input.
## 6483 paired-reads (in 50 unique pairings) successfully merged out of 6634 (in 93 pairings) input.
## 3999 paired-reads (in 94 unique pairings) successfully merged out of 4121 (in 155 pairings) input.
## 10538 paired-reads (in 135 unique pairings) successfully merged out of 11119 (in 311 pairings) input.
## 18240 paired-reads (in 299 unique pairings) successfully merged out of 18965 (in 515 pairings) input.
## 21192 paired-reads (in 324 unique pairings) successfully merged out of 22038 (in 608 pairings) input.
## 6815 paired-reads (in 78 unique pairings) successfully merged out of 7040 (in 156 pairings) input.
## 12219 paired-reads (in 86 unique pairings) successfully merged out of 12398 (in 140 pairings) input.
## 21478 paired-reads (in 207 unique pairings) successfully merged out of 22113 (in 395 pairings) input.
## 14683 paired-reads (in 50 unique pairings) successfully merged out of 14849 (in 83 pairings) input.
## 11651 paired-reads (in 353 unique pairings) successfully merged out of 13002 (in 890 pairings) input.
## 10672 paired-reads (in 61 unique pairings) successfully merged out of 10900 (in 96 pairings) input.
## 17814 paired-reads (in 602 unique pairings) successfully merged out of 20739 (in 1627 pairings) input.
## 11203 paired-reads (in 63 unique pairings) successfully merged out of 11506 (in 154 pairings) input.
## 173 paired-reads (in 6 unique pairings) successfully merged out of 272 (in 30 pairings) input.
## 11335 paired-reads (in 26 unique pairings) successfully merged out of 11506 (in 60 pairings) input.
## 7025 paired-reads (in 87 unique pairings) successfully merged out of 7793 (in 386 pairings) input.
## 7459 paired-reads (in 24 unique pairings) successfully merged out of 7576 (in 60 pairings) input.
## 13454 paired-reads (in 57 unique pairings) successfully merged out of 13656 (in 114 pairings) input.
## 18691 paired-reads (in 65 unique pairings) successfully merged out of 18944 (in 125 pairings) input.
## 24659 paired-reads (in 241 unique pairings) successfully merged out of 25602 (in 569 pairings) input.
## 21810 paired-reads (in 349 unique pairings) successfully merged out of 22708 (in 681 pairings) input.
## 10926 paired-reads (in 115 unique pairings) successfully merged out of 11371 (in 270 pairings) input.
## 11479 paired-reads (in 64 unique pairings) successfully merged out of 11961 (in 186 pairings) input.
## 6219 paired-reads (in 45 unique pairings) successfully merged out of 6501 (in 108 pairings) input.
## 7897 paired-reads (in 82 unique pairings) successfully merged out of 8511 (in 209 pairings) input.
## 9948 paired-reads (in 93 unique pairings) successfully merged out of 10621 (in 204 pairings) input.
## 6730 paired-reads (in 116 unique pairings) successfully merged out of 7762 (in 508 pairings) input.
## 25706 paired-reads (in 262 unique pairings) successfully merged out of 26991 (in 637 pairings) input.
## 5640 paired-reads (in 47 unique pairings) successfully merged out of 6001 (in 188 pairings) input.
## 14739 paired-reads (in 132 unique pairings) successfully merged out of 15299 (in 340 pairings) input.
## 6461 paired-reads (in 78 unique pairings) successfully merged out of 6904 (in 191 pairings) input.
## 5604 paired-reads (in 28 unique pairings) successfully merged out of 5921 (in 88 pairings) input.
## 7120 paired-reads (in 78 unique pairings) successfully merged out of 7676 (in 210 pairings) input.
## 14390 paired-reads (in 255 unique pairings) successfully merged out of 15979 (in 756 pairings) input.
## 18257 paired-reads (in 100 unique pairings) successfully merged out of 18916 (in 290 pairings) input.
## 19904 paired-reads (in 14 unique pairings) successfully merged out of 20094 (in 43 pairings) input.
## 5941 paired-reads (in 51 unique pairings) successfully merged out of 6423 (in 259 pairings) input.
## 14903 paired-reads (in 56 unique pairings) successfully merged out of 15165 (in 133 pairings) input.
## 7803 paired-reads (in 86 unique pairings) successfully merged out of 8190 (in 201 pairings) input.
## 15334 paired-reads (in 36 unique pairings) successfully merged out of 15512 (in 93 pairings) input.
## 8909 paired-reads (in 36 unique pairings) successfully merged out of 9239 (in 109 pairings) input.
## 17277 paired-reads (in 102 unique pairings) successfully merged out of 18002 (in 321 pairings) input.
## 2 paired-reads (in 1 unique pairings) successfully merged out of 2 (in 1 pairings) input.
## 10408 paired-reads (in 84 unique pairings) successfully merged out of 10779 (in 210 pairings) input.
## 4920 paired-reads (in 74 unique pairings) successfully merged out of 5400 (in 233 pairings) input.
## 6498 paired-reads (in 22 unique pairings) successfully merged out of 6684 (in 83 pairings) input.
## 8520 paired-reads (in 133 unique pairings) successfully merged out of 9354 (in 421 pairings) input.
## 8735 paired-reads (in 113 unique pairings) successfully merged out of 9660 (in 418 pairings) input.
## 7718 paired-reads (in 28 unique pairings) successfully merged out of 8070 (in 106 pairings) input.
## 10832 paired-reads (in 140 unique pairings) successfully merged out of 11541 (in 459 pairings) input.
## 13762 paired-reads (in 48 unique pairings) successfully merged out of 14076 (in 118 pairings) input.
## 12790 paired-reads (in 50 unique pairings) successfully merged out of 13084 (in 117 pairings) input.
## 11008 paired-reads (in 80 unique pairings) successfully merged out of 11920 (in 347 pairings) input.
## 4349 paired-reads (in 66 unique pairings) successfully merged out of 5087 (in 269 pairings) input.
## 5331 paired-reads (in 98 unique pairings) successfully merged out of 6127 (in 389 pairings) input.
## 9182 paired-reads (in 93 unique pairings) successfully merged out of 9978 (in 329 pairings) input.
## 6877 paired-reads (in 27 unique pairings) successfully merged out of 7070 (in 77 pairings) input.
## 12339 paired-reads (in 31 unique pairings) successfully merged out of 12602 (in 89 pairings) input.
## 12641 paired-reads (in 28 unique pairings) successfully merged out of 12812 (in 61 pairings) input.
## 6085 paired-reads (in 78 unique pairings) successfully merged out of 6797 (in 295 pairings) input.
## 4512 paired-reads (in 67 unique pairings) successfully merged out of 5335 (in 239 pairings) input.
## 5255 paired-reads (in 24 unique pairings) successfully merged out of 5499 (in 73 pairings) input.
## 5896 paired-reads (in 67 unique pairings) successfully merged out of 6572 (in 207 pairings) input.
## 8222 paired-reads (in 51 unique pairings) successfully merged out of 8621 (in 140 pairings) input.
## 10061 paired-reads (in 44 unique pairings) successfully merged out of 10304 (in 116 pairings) input.
## 12684 paired-reads (in 80 unique pairings) successfully merged out of 13159 (in 246 pairings) input.
## 19108 paired-reads (in 214 unique pairings) successfully merged out of 20206 (in 651 pairings) input.
## 17896 paired-reads (in 76 unique pairings) successfully merged out of 18520 (in 238 pairings) input.
## 149 paired-reads (in 1 unique pairings) successfully merged out of 149 (in 1 pairings) input.
## 4144 paired-reads (in 31 unique pairings) successfully merged out of 4430 (in 130 pairings) input.
## 8545 paired-reads (in 200 unique pairings) successfully merged out of 9699 (in 639 pairings) input.
## 7779 paired-reads (in 5 unique pairings) successfully merged out of 7796 (in 9 pairings) input.
## 8599 paired-reads (in 56 unique pairings) successfully merged out of 8998 (in 119 pairings) input.
## 3 paired-reads (in 1 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 27 paired-reads (in 5 unique pairings) successfully merged out of 27 (in 5 pairings) input.
## 2144 paired-reads (in 23 unique pairings) successfully merged out of 2160 (in 37 pairings) input.
## 2821 paired-reads (in 25 unique pairings) successfully merged out of 2866 (in 58 pairings) input.
## 614 paired-reads (in 10 unique pairings) successfully merged out of 617 (in 13 pairings) input.
## 799 paired-reads (in 12 unique pairings) successfully merged out of 818 (in 19 pairings) input.
## 6184 paired-reads (in 19 unique pairings) successfully merged out of 6273 (in 57 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 0 paired-reads (in 0 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 175 paired-reads (in 1 unique pairings) successfully merged out of 175 (in 1 pairings) input.
## 6860 paired-reads (in 127 unique pairings) successfully merged out of 8177 (in 352 pairings) input.
## 6783 paired-reads (in 80 unique pairings) successfully merged out of 7303 (in 217 pairings) input.
## 766 paired-reads (in 20 unique pairings) successfully merged out of 910 (in 52 pairings) input.
## 16093 paired-reads (in 121 unique pairings) successfully merged out of 16669 (in 234 pairings) input.
## 940 paired-reads (in 21 unique pairings) successfully merged out of 1052 (in 36 pairings) input.
## 978 paired-reads (in 1 unique pairings) successfully merged out of 979 (in 2 pairings) input.
## 1337 paired-reads (in 1 unique pairings) successfully merged out of 1337 (in 1 pairings) input.
## 9979 paired-reads (in 5 unique pairings) successfully merged out of 9995 (in 8 pairings) input.
## 18351 paired-reads (in 27 unique pairings) successfully merged out of 18544 (in 90 pairings) input.
## 11311 paired-reads (in 66 unique pairings) successfully merged out of 11659 (in 163 pairings) input.
## 19757 paired-reads (in 54 unique pairings) successfully merged out of 19992 (in 111 pairings) input.
## 12565 paired-reads (in 131 unique pairings) successfully merged out of 13367 (in 360 pairings) input.
## 10561 paired-reads (in 42 unique pairings) successfully merged out of 10810 (in 90 pairings) input.
## 9992 paired-reads (in 44 unique pairings) successfully merged out of 10198 (in 113 pairings) input.
## 1696 paired-reads (in 17 unique pairings) successfully merged out of 1728 (in 36 pairings) input.
## 9599 paired-reads (in 91 unique pairings) successfully merged out of 9956 (in 187 pairings) input.
## 20079 paired-reads (in 219 unique pairings) successfully merged out of 21089 (in 633 pairings) input.
## 10400 paired-reads (in 45 unique pairings) successfully merged out of 10629 (in 122 pairings) input.
## 9792 paired-reads (in 79 unique pairings) successfully merged out of 10283 (in 236 pairings) input.
## 13361 paired-reads (in 105 unique pairings) successfully merged out of 13731 (in 256 pairings) input.
## 2838 paired-reads (in 42 unique pairings) successfully merged out of 3021 (in 107 pairings) input.
## 14857 paired-reads (in 104 unique pairings) successfully merged out of 15245 (in 212 pairings) input.
## 26398 paired-reads (in 220 unique pairings) successfully merged out of 27068 (in 422 pairings) input.
## 18135 paired-reads (in 208 unique pairings) successfully merged out of 18919 (in 545 pairings) input.
## 7228 paired-reads (in 65 unique pairings) successfully merged out of 8034 (in 344 pairings) input.
## 6811 paired-reads (in 49 unique pairings) successfully merged out of 7204 (in 171 pairings) input.
## 11137 paired-reads (in 109 unique pairings) successfully merged out of 12077 (in 367 pairings) input.
## 9854 paired-reads (in 28 unique pairings) successfully merged out of 9937 (in 56 pairings) input.
## 16743 paired-reads (in 211 unique pairings) successfully merged out of 17808 (in 565 pairings) input.
## 9242 paired-reads (in 64 unique pairings) successfully merged out of 9517 (in 162 pairings) input.
## 14436 paired-reads (in 15 unique pairings) successfully merged out of 14509 (in 33 pairings) input.
## 22697 paired-reads (in 168 unique pairings) successfully merged out of 23444 (in 431 pairings) input.
## 11685 paired-reads (in 168 unique pairings) successfully merged out of 12163 (in 372 pairings) input.
## 2637 paired-reads (in 16 unique pairings) successfully merged out of 2836 (in 48 pairings) input.
## 7470 paired-reads (in 58 unique pairings) successfully merged out of 8016 (in 228 pairings) input.
## 10857 paired-reads (in 11 unique pairings) successfully merged out of 10971 (in 30 pairings) input.
## 14527 paired-reads (in 107 unique pairings) successfully merged out of 15266 (in 328 pairings) input.
## 9450 paired-reads (in 21 unique pairings) successfully merged out of 9641 (in 58 pairings) input.
## 10693 paired-reads (in 135 unique pairings) successfully merged out of 13792 (in 355 pairings) input.
## 11180 paired-reads (in 141 unique pairings) successfully merged out of 11902 (in 364 pairings) input.
## 4622 paired-reads (in 114 unique pairings) successfully merged out of 5210 (in 307 pairings) input.
## 22213 paired-reads (in 370 unique pairings) successfully merged out of 23584 (in 783 pairings) input.
## 13987 paired-reads (in 126 unique pairings) successfully merged out of 14667 (in 300 pairings) input.
## 8393 paired-reads (in 62 unique pairings) successfully merged out of 8896 (in 244 pairings) input.
## 25267 paired-reads (in 330 unique pairings) successfully merged out of 26174 (in 726 pairings) input.
## 14312 paired-reads (in 53 unique pairings) successfully merged out of 14531 (in 123 pairings) input.
## 1373 paired-reads (in 15 unique pairings) successfully merged out of 1429 (in 38 pairings) input.
## 760 paired-reads (in 5 unique pairings) successfully merged out of 860 (in 16 pairings) input.
## 649 paired-reads (in 19 unique pairings) successfully merged out of 895 (in 93 pairings) input.
## 2387 paired-reads (in 20 unique pairings) successfully merged out of 2435 (in 32 pairings) input.
## 455 paired-reads (in 2 unique pairings) successfully merged out of 455 (in 2 pairings) input.
## 8326 paired-reads (in 47 unique pairings) successfully merged out of 8844 (in 217 pairings) input.
## 41 paired-reads (in 2 unique pairings) successfully merged out of 41 (in 2 pairings) input.
## 1 paired-reads (in 1 unique pairings) successfully merged out of 1 (in 1 pairings) input.
## 3 paired-reads (in 1 unique pairings) successfully merged out of 3 (in 1 pairings) input.
## 17203 paired-reads (in 170 unique pairings) successfully merged out of 18183 (in 400 pairings) input.
## 12666 paired-reads (in 88 unique pairings) successfully merged out of 13264 (in 251 pairings) input.
## 17553 paired-reads (in 139 unique pairings) successfully merged out of 18223 (in 386 pairings) input.
#this paper used min overlap of 10bp with "nrITS2": https://www.sciencedirect.com/science/article/pii/S0048969721055455#s0010

#I could play with minOverlap parameter to see the effects on merging, but leaving at 11 for now
# length=30L; overlap=25; mismat=0
# mergers.test <- mergePairs(head(dadaFs, n=length), head(derepFs, n=length), head(dadaRs, n=length), head(derepRs, n=length), verbose=TRUE, minOverlap = overlap, maxMismatch = mismat)
# rm(length,overlap,mismat,mergers.test)

The mergePairs(…) function returns a data.frame corresponding to each successfully merged unique sequence. The “forward” and “reverse” columns record which forward and reverse sequence contributed to that merged sequence.

Construct ASV table

We can now construct an amplicon sequence variant table (ASV) table, a higher-resolution version of the OTU table produced by traditional methods.

seqtab <- makeSequenceTable(mergers)
dim(seqtab)
## [1]   246 15213
# 246 samples
# 15,213 ASVs

Post-dada2 quality control

Remove chimeras

A chimera is a single DNA sequence originating when multiple transcripts or DNA sequences get joined. Chimeras can be considered artifacts and be filtered out from the data during processing

The number of unique variants that are chimeras is higher in exact amplicon sequence variant (ASV) methods like DADA2 than they were in OTU methods, as chimeras very close to the real sequences are the most common type of chimera, and those used to be hidden by being lumped into an OTU. So some expectations based on previous OTU processing should be modified a little bit.

Robert Edgar discusses this in more detail in his uchime2 paper: https://doi.org/10.1101/074252

seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) #more stringent parameter minFoldParentOverAbundance=2
## Identified 14259 bimeras out of 15213 input sequences.
#Identified 14249 bimeras out of 15201 input sequences.

length(sample.names)
## [1] 246
rownames(seqtab.nochim) <- sample.names

sum(seqtab) # reads
## [1] 2332765
sum(seqtab.nochim) # reads after removing chimeras
## [1] 1928495
sum(seqtab.nochim)/sum(seqtab) # proportion of reads remaining
## [1] 0.8266992
100-((sum(seqtab.nochim)/sum(seqtab))*100) # 17 percent of reads removed as chimeras
## [1] 17.33008

The more important metric here is the fraction of reads removed as bimeras, which is <20% here, so in the range of what we see. It is normal that a much higher fraction of ASVs than reads will be removed as bimeras, because chimeras are highly diverse but usually quite rare. You will see more chimeric ASVs if you sequence deeply, but not a meaningfully higher number of chimeric reads.

If you’re seeing more than 20% of reads being chimeric, you may want to re-examine your PCR protocol in the future. Longer extension times and fewer PCR cycles are both approaches that have been shown to reduce the formation of chimeric amplicons.

Inspect distribution of sequence lengths

Looking distribution of sequence lengths in the non-chimeric ASVs

table(nchar(getSequences(seqtab.nochim)))
## 
##  57  60  63  65  67  68  74  79  81  82  89  90  95  96 100 104 106 110 111 112 
##   3   1   1   3   1   2   1   1   1   1   1   1   1   3   2   2   1   1   1   1 
## 113 115 116 117 118 121 122 123 124 126 129 132 133 135 145 146 147 148 150 155 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   2   1   1   1   2   1   1 
## 156 160 164 166 168 169 171 172 173 175 177 178 182 185 189 190 192 196 199 204 
##   1   1   1   4   1   2   1   1   1   1   1   1   1   1   1   2   2   1   2   1 
## 206 208 210 212 216 221 222 227 228 233 239 240 244 254 257 267 280 302 318 324 
##   1   1   1   1   1   2   2   1   2   4   1   1   1   1   1   1   1   1   1   1 
## 345 350 370 371 373 388 389 396 401 411 416 420 422 425 426 427 428 434 435 437 
##   3   1   2   2   1   2   2   1   1   1   2   1   2   2   6   2   3   1   2   1 
## 439 441 443 444 445 446 447 448 449 450 451 453 454 455 456 466 469 470 477 490 
##   1   1   1   1  13 630   8  10  44   4   5   1   1   1   3  68  14   1   1   1 
## 493 494 496 
##   1   1   1
sum(table(nchar(getSequences(seqtab.nochim)))) #total ASVs
## [1] 954
plot(table(nchar(getSequences(seqtab.nochim))))

Remove ASVs with less than 100 reads total

This threshold has been used before for characterizing pollinator microbiomes (Hammer et al. 2020, 2023). Since my pollens are expected to be a lot more simple than a microbiome, I feel this threshold is quite conservative.

seqtab.nochim<-seqtab.nochim[,!!colSums(seqtab.nochim > 100)]

Citations for this step: Hammer, T. J., J. C. Dickerson, W. O. McMillan, and N. Fierer. 2020. Heliconius Butterflies Host Characteristic and Phylogenetically Structured Adult-Stage Microbiomes. Applied and Environmental Microbiology 86. Hammer, T. J., J. Kueneman, M. Argueta-Guzmán, Q. S. McFrederick, Lady Grant, W. Wcislo, S. Buchmann, and B. N. Danforth. 2023. Bee breweries: The unusually fermentative, lactobacilli-dominated brood cell microbiomes of cellophane bees. Frontiers in Microbiology 14:1–16.

Remove contaminating sequences with decontam

The steps & info below are largely from this tutorial: https://benjjneb.github.io/decontam/vignettes/decontam_intro.html#necessary-ingredients

The investigation of environmental microbial communities and microbiomes has been transformed by the recent widespread adoption of culture-free high-throughput sequencing methods. In amplicon sequencing a particular genetic locus is amplified from DNA extracted from the community of interest, and then sequenced on a next-generation sequencing platform. In shotgun metagenomics, bulk DNA is extracted from the community of interest and sequenced. Both techniques provide cost-effective and culture-free characterizations of microbial communities.

However, the accuracy of these methods is limited in practice by the introduction of contaminating DNA that was not truly present in the sampled community. This contaminating DNA can come from several sources, such as the reagents used in the sequencing reaction, and can critically interfere with downstream analyses, especially in lower biomass environments. The decontam package provides simple statistical methods to identify and visualize contaminating DNA features, allowing them to be removed and a more accurate picture of sampled communities to be constructed from marker-gene and metagenomics data.

###Prep phyloseq objects

#load packages
library(decontam)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact()      masks XVector::compact()
## ✖ purrr::compose()      masks ShortRead::compose()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id()           masks ShortRead::id()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks GenomicAlignments::last()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks XVector::slice(), IRanges::slice()
## ✖ tibble::view()        masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#load sample data
samp.ctrls.conc<-read_excel("/scratch/kls7sg/Bioinformatics/2024-09-27_MiSeq_v3/SampleConc.xlsx")
head(samp.ctrls.conc)
## # A tibble: 6 × 5
##   SampleID          SampleID_AllUnderscores Control Conc_ng.uL Note 
##   <chr>             <chr>                   <lgl>        <dbl> <chr>
## 1 ITS2_2020-6-16_H1 ITS2_2020_6_16_H1       FALSE         60.1 <NA> 
## 2 ITS2_2020-6-16_H5 ITS2_2020_6_16_H5       FALSE         53.7 <NA> 
## 3 ITS2_2020-6-16_H6 ITS2_2020_6_16_H6       FALSE         41.8 <NA> 
## 4 ITS2_2020-6-17_H2 ITS2_2020_6_17_H2       FALSE         45.7 <NA> 
## 5 ITS2_2020-6-17_H4 ITS2_2020_6_17_H4       FALSE         75.7 <NA> 
## 6 ITS2_2020-6-17_H8 ITS2_2020_6_17_H8       FALSE         59.6 <NA>
#filter sample data for just rbcL samples
samp.ctrls.conc <- samp.ctrls.conc %>% filter(str_starts(SampleID,'rbcL'))

detach("package:tidyverse")
#create phyloseq objects with seqtab and sample data (i.e., samp.ctrls.conc)
SAMP <- sample_data(samp.ctrls.conc)
  sample_names(SAMP) <- sample_data(SAMP)$SampleID_AllUnderscores
OTU <- otu_table(seqtab.nochim, taxa_are_rows = F, errorIfNULL=TRUE)
  sample_names(OTU)<-paste0("rbcL_",sample_names(OTU))

#checking if name formats in SAMP and OTU objects match
head(sample_names(SAMP))
## [1] "rbcL_2020_6_16_H1" "rbcL_2020_6_16_H5" "rbcL_2020_6_16_H6"
## [4] "rbcL_2020_6_17_H2" "rbcL_2020_6_17_H4" "rbcL_2020_6_17_H8"
head(sample_names(OTU))
## [1] "rbcL_2020_6_16_H1" "rbcL_2020_6_16_H5" "rbcL_2020_6_16_H6"
## [4] "rbcL_2020_6_17_H2" "rbcL_2020_6_17_H4" "rbcL_2020_6_17_H8"
#checking if number of samples in SAMP and OTU objects match
identical(sample_names(SAMP),sample_names(OTU)) # The safe and reliable way to test two objects for being exactly equal. It returns TRUE in this case, FALSE in every other case.
## [1] FALSE
match(sample_names(SAMP), sample_names(OTU)) # match returns a vector of the positions of (first) matches of its first argument in its second.
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  NA  25  26  NA  NA  27  28  29  30  31  32  33
##  [37]  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
##  [55]  52  53  54  55  56  57  58  59  60  61  62  63  64  65  NA  66  NA  67
##  [73]  NA  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84
##  [91]  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102
## [109] 103  NA 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
## [127] 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
## [145] 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
## [163] 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
## [181] 174 175  NA  NA 176 177  NA 178 179 180 181 182 183 184 185 186 187 188
## [199]  NA 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
## [217] 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
## [235] 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
## [253] 242  NA  NA 243  NA  NA 244 245 246
sample_names(SAMP) %in% sample_names(OTU) # %in% is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand.
##   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25] FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
##  [73] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [97]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [109]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [157]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [169]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [181]  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [193]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [205]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [217]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [229]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [241]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [253]  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
#subset phyloseq object with sample info to contain only the samples present in the OTU obj
SAMP<-prune_samples(sample_names(SAMP) %in% sample_names(OTU), SAMP) #prune_samples() is a method for pruning/filtering unwanted samples by defining those you want to keep. first argument is a logical vector where the kept samples are TRUE, and length is equal to the number of samples in object x; second argument is the phyloseq object to be pruned (subsetted)

#join phyloseq objects into one
physeq = phyloseq(OTU, SAMP)
physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 259 taxa and 246 samples ]
## sample_data() Sample Data:       [ 246 samples by 5 sample variables ]
slotNames(physeq) #slots for "tax_table", "phy_tree", "refseq" are empty
## [1] "otu_table" "tax_table" "sam_data"  "phy_tree"  "refseq"
#reorder physeq 
physeq.reord <- physeq
otu_table(physeq.reord) <- otu_table(physeq.reord)[order(sample_data(physeq.reord)$Control),] # reorder so the controls appear last

Plots read numbers for control samples and unknown samples

A quick first look at the library sizes (i.e. the number of reads) in each sample, as a function of whether that sample was a true positive sample or a negative control:

#plot read numbers for control vs sample 
df <- as.data.frame(sample_data(physeq)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq) #sum read numbers
df <- df[order(df$LibrarySize),] #sort by total read numbers
df$Index <- seq(nrow(df)) #create index based on read number sort order
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Control))) + geom_point() #plot of read numbers of every library, colored by control vs unk samples

ggplot(data=df, aes(x=Index, y=LibrarySize, color=Conc_ng.uL)) + geom_point() #plot of read numbers of every library, colored by library stock concentration

Identify probable contaminants

The first contaminant identification method we’ll use is the “frequency” method. In this method, the distribution of the frequency of each sequence feature as a function of the input DNA concentration is used to identify contaminants.

The second contaminant identification method is the “prevalence” method. In this method, the prevalence (presence/absence across samples) of each sequence feature in true positive samples is compared to the prevalence in negative controls to identify contaminants.

The final, “combined” method: The frequency and prevalence probabilities are combined with Fisher’s method and used to identify contaminants.

#identify contaminants by frequency & prevalence combined
sample_data(physeq.reord)$is.neg <- sample_data(physeq.reord)$Control == "TRUE"
contamdf.comb <- isContaminant(physeq.reord, method="combined", conc="Conc_ng.uL", neg="is.neg")
## Warning in .is_contaminant(seqtab, conc = conc, neg = neg, method = method, :
## Removed 14 samples with zero total counts (or frequency).
## Warning in .is_contaminant(seqtab, conc = conc, neg = neg, method = method, :
## Removed 14 samples with zero total counts (or frequency).
table(contamdf.comb$contaminant)
## 
## FALSE  TRUE 
##   251     8
which(contamdf.comb$contaminant)
## [1]  2  9 13 18 25 26 43 50
# Make phyloseq object of presence-absence in negative controls and true samples
ps.pa <- transform_sample_counts(physeq.reord, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Control == "TRUE", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Control == "FALSE", ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
                      contaminant=contamdf.comb$contaminant)
# Plot the number of times these taxa were observed in negative controls and positive samples
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
  xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")

#Samples seem to split pretty cleanly into a branch that shows up mostly in positive samples, and another that shows up mostly in negative controls, and the contaminant assignment (at default probability threshold) has done a good job of identifying those mostly in negative controls.

Remove contaminating sequences

#remove contaminants, create seqtab.nochim.nocontam object
physeq.reord.noncontam <- prune_taxa(!contamdf.comb$contaminant, physeq.reord) #create subsetted phyloseq object with the contaminants removed (pruned)
seqtab.nochim.nocontam <- otu_table(physeq.reord.noncontam) #extract otu table from pruned data
class(seqtab.nochim.nocontam) <- "matrix" #coerce to matrix (so we can manipulate and export more easily)
## Warning in class(seqtab.nochim.nocontam) <- "matrix": Setting class(x) to
## "matrix" sets attribute to NULL; result will no longer be an S4 object
substr(rownames(seqtab.nochim.nocontam), 6, 100) #captures a substring, starting at character 6 (from the left) and continuing up to 100 characters (this will grab sample name without the rbcL designation)
##   [1] "2020_6_16_H1"                       
##   [2] "2020_6_16_H5"                       
##   [3] "2020_6_16_H6"                       
##   [4] "2020_6_17_H2"                       
##   [5] "2020_6_17_H4"                       
##   [6] "2020_6_17_H8"                       
##   [7] "2020_6_18_H3"                       
##   [8] "2020_6_18_H7"                       
##   [9] "2020_6_18_H9"                       
##  [10] "2020_6_3_H1"                        
##  [11] "2020_6_3_H5"                        
##  [12] "2020_6_3_H6"                        
##  [13] "2020_6_30_H1"                       
##  [14] "2020_6_30_H5"                       
##  [15] "2020_6_30_H6"                       
##  [16] "2020_6_4_H2"                        
##  [17] "2020_6_4_H4"                        
##  [18] "2020_6_4_H8"                        
##  [19] "2020_6_5_H3"                        
##  [20] "2020_6_5_H7"                        
##  [21] "2020_6_5_H9"                        
##  [22] "2020_7_1_H2"                        
##  [23] "2020_7_1_H4"                        
##  [24] "2020_7_1_H8"                        
##  [25] "2020_7_14_H5"                       
##  [26] "2020_7_14_H6"                       
##  [27] "2020_7_15_H8"                       
##  [28] "2020_7_16_H3"                       
##  [29] "2020_7_16_H7"                       
##  [30] "2020_7_16_H9"                       
##  [31] "2020_7_2_H3"                        
##  [32] "2020_7_2_H7"                        
##  [33] "2020_7_2_H9"                        
##  [34] "2021_6_13_H1"                       
##  [35] "2021_6_13_H3"                       
##  [36] "2021_6_14_H11"                      
##  [37] "2021_6_14_H6"                       
##  [38] "2021_6_14_H7"                       
##  [39] "2021_6_15_H8"                       
##  [40] "2021_6_21_H10"                      
##  [41] "2021_6_21_H12"                      
##  [42] "2021_6_21_H9"                       
##  [43] "2021_6_27_H21"                      
##  [44] "2021_6_27_H22"                      
##  [45] "2021_6_27_H27"                      
##  [46] "2021_6_28_H25"                      
##  [47] "2021_6_28_H26"                      
##  [48] "2021_6_28_H28"                      
##  [49] "2021_6_29_H17"                      
##  [50] "2021_6_29_H23"                      
##  [51] "2021_6_29_H24"                      
##  [52] "2021_6_4_H21"                       
##  [53] "2021_6_4_H22"                       
##  [54] "2021_6_4_H27"                       
##  [55] "2021_6_5_H18"                       
##  [56] "2021_6_5_H25"                       
##  [57] "2021_6_5_H26"                       
##  [58] "2021_6_6_H17"                       
##  [59] "2021_6_6_H24"                       
##  [60] "2021_6_7_H23"                       
##  [61] "2021_7_14_H10"                      
##  [62] "2021_7_14_H12"                      
##  [63] "2021_7_20_H27"                      
##  [64] "2021_7_21_H25"                      
##  [65] "2021_7_21_H26"                      
##  [66] "2021_7_6_H11"                       
##  [67] "2021_7_6_H6"                        
##  [68] "2021_7_7_H8"                        
##  [69] "2021_7_8_H3"                        
##  [70] "2023_6_12_H3"                       
##  [71] "2023_6_12_H5"                       
##  [72] "2023_6_12_H7"                       
##  [73] "2023_6_13_H6"                       
##  [74] "2023_6_13_H8"                       
##  [75] "2023_6_13_H9"                       
##  [76] "2023_6_14_H3"                       
##  [77] "2023_6_14_H7"                       
##  [78] "2023_6_14_H9"                       
##  [79] "2023_6_16_H5"                       
##  [80] "2023_6_24_H6"                       
##  [81] "2023_6_24_H8"                       
##  [82] "2023_6_25_H2"                       
##  [83] "2023_6_25_H4"                       
##  [84] "2023_6_26_H1"                       
##  [85] "2023_6_26_H7"                       
##  [86] "2023_6_27_H3"                       
##  [87] "2023_6_27_H5"                       
##  [88] "2023_6_8_H1"                        
##  [89] "2023_6_8_H2"                        
##  [90] "2023_6_8_H4"                        
##  [91] "2023_6_9_H2"                        
##  [92] "2023_6_9_H4"                        
##  [93] "2023_7_15_H6"                       
##  [94] "2023_7_16_H4"                       
##  [95] "2023_7_17_H1"                       
##  [96] "2023_7_18_H3"                       
##  [97] "2023_7_18_H7"                       
##  [98] "2023_7_29_H5"                       
##  [99] "2023_7_29_H7"                       
## [100] "2023_7_30_H8"                       
## [101] "2023_7_30_H9"                       
## [102] "2023_7_5_H1"                        
## [103] "2023_7_5_H2"                        
## [104] "2023_7_6_H6"                        
## [105] "2023_7_6_H8"                        
## [106] "2023_7_6_H9"                        
## [107] "2023_7_8_H3"                        
## [108] "2023_7_8_H5"                        
## [109] "2023_7_8_H7"                        
## [110] "2023_8_4_H2"                        
## [111] "2023_8_4_H5"                        
## [112] "2023_8_4_H6"                        
## [113] "2023_8_4_H7"                        
## [114] "2023_8_4_H8"                        
## [115] "2023_8_4_H9"                        
## [116] "Ba001"                              
## [117] "Ba002"                              
## [118] "Ba003"                              
## [119] "Bb001"                              
## [120] "Bb002"                              
## [121] "Bb003"                              
## [122] "Bb004"                              
## [123] "Bb005"                              
## [124] "Bb007"                              
## [125] "Bb008"                              
## [126] "Bb009"                              
## [127] "Bb010"                              
## [128] "Bb011"                              
## [129] "Bb012"                              
## [130] "Bb013"                              
## [131] "Bb014"                              
## [132] "Bb015"                              
## [133] "Bb016"                              
## [134] "Bb017"                              
## [135] "Bb018"                              
## [136] "Bb019"                              
## [137] "Bb020"                              
## [138] "Bb021"                              
## [139] "Bb022"                              
## [140] "Bb023"                              
## [141] "Bb024"                              
## [142] "Bb025"                              
## [143] "Bf001"                              
## [144] "Bf002"                              
## [145] "Bf003"                              
## [146] "Bf004"                              
## [147] "Bg001"                              
## [148] "Bg002"                              
## [149] "Bg003"                              
## [150] "Bg004"                              
## [151] "Bg005"                              
## [152] "Bg006"                              
## [153] "Bg007"                              
## [154] "Bg008"                              
## [155] "Bg009"                              
## [156] "Bg010"                              
## [157] "Bg011"                              
## [158] "Bg012"                              
## [159] "Bg013"                              
## [160] "Bg014"                              
## [161] "Bg015"                              
## [162] "Bg016"                              
## [163] "Bg017"                              
## [164] "Bg018"                              
## [165] "Bg019"                              
## [166] "Bi001"                              
## [167] "Bi002"                              
## [168] "Bi003"                              
## [169] "Bi004"                              
## [170] "Bi005"                              
## [171] "Bi006"                              
## [172] "Bi007"                              
## [173] "CKC0001"                            
## [174] "ESE0004"                            
## [175] "KLS0007"                            
## [176] "KLS0027"                            
## [177] "KLS0044"                            
## [178] "KLS0045"                            
## [179] "KLS0052"                            
## [180] "KLS0054"                            
## [181] "KLS0055"                            
## [182] "KLS0071"                            
## [183] "KLS0095"                            
## [184] "KLS0096"                            
## [185] "KLS0105"                            
## [186] "KLS0106"                            
## [187] "KLS0119"                            
## [188] "KLS0134"                            
## [189] "KLS0135"                            
## [190] "KLS0136"                            
## [191] "KLS0137"                            
## [192] "KLS0138"                            
## [193] "KLS0139"                            
## [194] "KLS0150"                            
## [195] "KLS0153"                            
## [196] "KLS0155"                            
## [197] "KLS0156"                            
## [198] "KLS0159"                            
## [199] "KLS0163"                            
## [200] "KLS0165"                            
## [201] "KLS0167"                            
## [202] "KLS0168"                            
## [203] "KLS0169"                            
## [204] "KLS0170"                            
## [205] "KLS0200"                            
## [206] "KLS0201"                            
## [207] "KLS0205"                            
## [208] "KLS0209"                            
## [209] "KLS0221"                            
## [210] "KLS0224"                            
## [211] "KLS0225"                            
## [212] "KLS0227"                            
## [213] "KLS0241"                            
## [214] "KLS0244"                            
## [215] "KLS0246"                            
## [216] "KLS0248"                            
## [217] "KLS0253"                            
## [218] "KLS0254"                            
## [219] "KLS0256"                            
## [220] "KLS0259"                            
## [221] "KLS0263"                            
## [222] "KLS0266"                            
## [223] "KLS0272"                            
## [224] "SCA0009"                            
## [225] "SCA0010"                            
## [226] "SCA0013"                            
## [227] "ext_neg_ctrl_20230909"              
## [228] "ext_neg_ctrl_20231007"              
## [229] "ext_neg_ctrl_20231008"              
## [230] "ext_neg_ctrl_2024220A"              
## [231] "ext_neg_ctrl_2024220B"              
## [232] "ext_neg_ctrl_2024221A"              
## [233] "ext_neg_ctrl_2024221B"              
## [234] "ext_neg_ctrl_2024222A"              
## [235] "ext_neg_ctrl_2024222B"              
## [236] "ext_neg_ctrl_2024312A"              
## [237] "ext_neg_ctrl_2024312B"              
## [238] "ext_neg_ctrl_2024314A"              
## [239] "ext_neg_ctrl_2024314B"              
## [240] "ext_neg_ctrl_2024319"               
## [241] "pcr_rbcL_neg_crtl_20240417"         
## [242] "pcr_rbcL_neg_ctrl_20240409"         
## [243] "pcr_rbcL_neg_ctrl_20240418A"        
## [244] "pcr_rbcL_neg_ctrl_20240418B"        
## [245] "pcr_rbcL_neg_ctrl_20240523"         
## [246] "rbcL_pcr_neg_ctrl_20231021_20231119"
identical(substr(rownames(seqtab.nochim.nocontam), 6, 100), rownames(seqtab.nochim)) #they are not in the same order, but this is expected because we had previously reordered nocontam according to total reads
## [1] FALSE
match(substr(rownames(seqtab.nochim.nocontam), 6, 100), rownames(seqtab.nochim)) #returns a vector of the positions of (first) matches of its first argument in its second
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 189 190 191 192 193 194
## [181] 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
## [199] 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
## [217] 231 232 233 234 235 236 237 244 245 246 175 176 177 178 179 180 181 182
## [235] 183 184 185 186 187 188 238 239 240 241 242 243
index<-paste0("rbcL_",rownames(seqtab.nochim)) #save the order of sample names in nochim (and paste rbcL_ in front) as an index for reordering nocontam

seqtab.nochim.nocontam <- seqtab.nochim.nocontam[paste0("rbcL_",rownames(seqtab.nochim)),,drop=FALSE] #reorder nocontam based on order of sample names in nochim

identical(substr(rownames(seqtab.nochim.nocontam),6,100),rownames(seqtab.nochim)) #true! they match exactly
## [1] TRUE

Track reads through the pipeline

We now inspect the the number of reads that made it through each step in the pipeline to verify everything worked as expected.

# just checking how many samples, reads at various stages
head(out)
##              reads.in reads.out
## 2020_6_16_H1    16617      9241
## 2020_6_16_H5     9459      5929
## 2020_6_16_H6     1296       850
## 2020_6_17_H2     6917      4559
## 2020_6_17_H4     3027      2027
## 2020_6_17_H8    24212     15437
length(out)/2
## [1] 258
length(dadaFs)
## [1] 246
length(dadaRs)
## [1] 246
length(mergers)
## [1] 246
length(rowSums(seqtab.nochim))
## [1] 246
length(rowSums(seqtab.nochim.nocontam))
## [1] 246
length(sample.names)
## [1] 246
getN <- function(x) sum(getUniques(x))
track <- cbind(out[names(derepFs),], # i only want the samples from "out" which appear in "derepFs" (but in the original tutorial code, you would just call for "out" here)
               sapply(dadaFs, getN), # If processing a single sample, replace with getN(dadaFs)
               sapply(dadaRs, getN), 
               sapply(mergers, getN),
               rowSums(seqtab.nochim),
               rowSums(seqtab.nochim.nocontam)
               )

colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim", "nocontam")
rownames(track) <- sample.names
track
##                                     input filtered denoisedF denoisedR merged
## 2020_6_16_H1                        16617     9241      9210      9190   8965
## 2020_6_16_H5                         9459     5929      5806      5833   4858
## 2020_6_16_H6                         1296      850       842       831    761
## 2020_6_17_H2                         6917     4559      4480      4452   2982
## 2020_6_17_H4                         3027     2027      1988      1991   1830
## 2020_6_17_H8                        24212    15437     15362     15357  14791
## 2020_6_18_H3                         6564     4021      3931      3977   3247
## 2020_6_18_H7                        12593     8636      8594      8594   8339
## 2020_6_18_H9                         7250     4778      4769      4736   4320
## 2020_6_3_H1                         23986    15258     15182     15112  13957
## 2020_6_3_H5                         28537    17366     17225     17210  15540
## 2020_6_3_H6                         29641    17405     17308     17342  16216
## 2020_6_30_H1                        18481    12116     12070     12040  11003
## 2020_6_30_H5                        18531    12313     12271     12244  11620
## 2020_6_30_H6                        18275    12755     12710     12692  12018
## 2020_6_4_H2                          1402      851       836       832    567
## 2020_6_4_H4                           556      350       324       342    284
## 2020_6_4_H8                         13252     8162      8027      8033   7061
## 2020_6_5_H3                         25128    16183     16101     16054  15185
## 2020_6_5_H7                         21045    13260     13165     13190  12306
## 2020_6_5_H9                         19654    12153     12098     12087  11510
## 2020_7_1_H2                         19567    13295     13205     13151  11719
## 2020_7_1_H4                             4        2         1         1      0
## 2020_7_1_H8                         19719    13616     13542     13549  12827
## 2020_7_14_H5                           18        1         1         1      1
## 2020_7_14_H6                            1        1         1         1      0
## 2020_7_15_H8                        14331     8785      8768      8745   8601
## 2020_7_16_H3                        54296    36464     36325     36264  35105
## 2020_7_16_H7                        12751     8836      8797      8765   8555
## 2020_7_16_H9                           29        1         1         1      0
## 2020_7_2_H3                         10763     7246      7220      7209   6958
## 2020_7_2_H7                         14383     9836      9772      9776   8993
## 2020_7_2_H9                         20528    14023     13977     13962  13210
## 2021_6_13_H1                        21810    14260     14231     14164  13769
## 2021_6_13_H3                        17070    11753     11640     11648  10503
## 2021_6_14_H11                       18063    12176     12122     12103  11736
## 2021_6_14_H6                        17750    11728     11618     11618  10687
## 2021_6_14_H7                        15547    10651     10625     10602  10452
## 2021_6_15_H8                        18595    12915     12890     12832  12426
## 2021_6_21_H10                       23512    16187     16141     16075  15566
## 2021_6_21_H12                       25343    17032     16983     16933  16581
## 2021_6_21_H9                        21263    13821     13701     13756  13193
## 2021_6_27_H21                       23331    15729     15602     15655  15055
## 2021_6_27_H22                       27932    19232     19227     19192  19176
## 2021_6_27_H27                       18354    12550     12441     12400  11605
## 2021_6_28_H25                       14755    10169     10046     10083   9643
## 2021_6_28_H26                        9038     6196      6168      6128   5787
## 2021_6_28_H28                       14931    10332     10299     10225   9722
## 2021_6_29_H17                       14135     9104      9069      9057   8741
## 2021_6_29_H23                       17111    11907     11798     11823  11174
## 2021_6_29_H24                        1100      715       707       696    664
## 2021_6_4_H21                        17980    11218     11155     11132  10227
## 2021_6_4_H22                         8144     4717      4687      4646   4340
## 2021_6_4_H27                         7028     4547      4528      4502   4188
## 2021_6_5_H18                         1897     1152      1146      1130    920
## 2021_6_5_H25                         4496     2834      2760      2759   1828
## 2021_6_5_H26                         5364     3400      3347      3326   2883
## 2021_6_6_H17                        11941     7362      7282      7254   6788
## 2021_6_6_H24                        18401    11939     11864     11838  10565
## 2021_6_7_H23                        17153    10457     10411     10354   9631
## 2021_7_14_H10                       32736    22653     22639     22627  22525
## 2021_7_14_H12                           7        4         3         3      0
## 2021_7_20_H27                       14261     9282      9227      9219   8856
## 2021_7_21_H25                       17648    12630     12616     12582  12409
## 2021_7_21_H26                           3        1         1         1      0
## 2021_7_6_H11                        21218    13905     13802     13849  13142
## 2021_7_6_H6                         20087    13693     13489     13552  12548
## 2021_7_7_H8                         23042    15342     15321     15302  15258
## 2021_7_8_H3                         16549    11288     11239     11217  10998
## 2023_6_12_H3                        10325     6412      6360      6365   6096
## 2023_6_12_H5                        20681    14325     14287     14269  13614
## 2023_6_12_H7                        13867     8964      8927      8908   8521
## 2023_6_13_H6                        20034    12999     12946     12929  12552
## 2023_6_13_H8                        12403     8254      8226      8216   7644
## 2023_6_13_H9                         5140     3377      3353      3345   3164
## 2023_6_14_H3                        21637    15274     15240     15193  14940
## 2023_6_14_H7                        22097    14310     14241     14231  13267
## 2023_6_14_H9                        18343    12646     12610     12584  12100
## 2023_6_16_H5                        17693    11967     11885     11923  11626
## 2023_6_24_H6                        13200     8763      8550      8518   7464
## 2023_6_24_H8                        14775     9782      9689      9665   8716
## 2023_6_25_H2                        20061    13012     12820     12801  11406
## 2023_6_25_H4                         8140     5350      5200      5203   4293
## 2023_6_26_H1                         8582     5962      5851      5857   5019
## 2023_6_26_H7                        15559    10330     10091     10127   8600
## 2023_6_27_H3                         9104     6278      6116      6109   5413
## 2023_6_27_H5                        17235    11398     11094     11122   9709
## 2023_6_8_H1                         14495     9094      8973      8990   8514
## 2023_6_8_H2                         14863    10848     10825     10814  10661
## 2023_6_8_H4                         19094    11980     11925     11897  10641
## 2023_6_9_H2                         34007    23277     23086     23099  21511
## 2023_6_9_H4                         10947     7104      6978      6966   5749
## 2023_7_15_H6                        26987    18724     18598     18581  17494
## 2023_7_16_H4                        18816    13047     12962     12981  12124
## 2023_7_17_H1                        31619    21557     21417     21344  20032
## 2023_7_18_H3                        10733     7624      7529      7454   6429
## 2023_7_18_H7                        12607     8641      8585      8555   8191
## 2023_7_29_H5                        12607     8843      8807      8801   8426
## 2023_7_29_H7                        27811    19278     19147     19176  18638
## 2023_7_30_H8                        12062     8704      8695      8662   8604
## 2023_7_30_H9                         6304     4403      4353      4360   4010
## 2023_7_5_H1                          9600     6719      6676      6674   6483
## 2023_7_5_H2                          5936     4171      4149      4142   3999
## 2023_7_6_H6                         16841    11259     11183     11189  10538
## 2023_7_6_H8                         29393    19324     19088     19045  18240
## 2023_7_6_H9                         31609    22375     22157     22151  21192
## 2023_7_8_H3                         10194     7103      7076      7061   6815
## 2023_7_8_H5                         17601    12450     12430     12415  12219
## 2023_7_8_H7                         33206    22286     22199     22190  21478
## 2023_8_4_H2                         22703    15351     14972     14891  14683
## 2023_8_4_H5                         19081    13489     13113     13162  11651
## 2023_8_4_H6                         17531    11447     11066     10976  10672
## 2023_8_4_H7                         31660    21306     20936     20932  17814
## 2023_8_4_H8                         17954    11628     11589     11536  11203
## 2023_8_4_H9                           524      344       309       291    173
## Ba001                               21111    11589     11547     11548  11335
## Ba002                               12744     8068      7957      7889   7025
## Ba003                               11874     7634      7604      7602   7459
## Bb001                               21681    13734     13698     13681  13454
## Bb002                               29960    19049     19013     18970  18691
## Bb003                               38886    25904     25777     25716  24659
## Bb004                               35805    23070     22899     22862  21810
## Bb005                               18006    11519     11436     11451  10926
## Bb007                               19551    12125     12050     12031  11479
## Bb008                               10374     6567      6518      6546   6219
## Bb009                               13681     8599      8545      8559   7897
## Bb010                               17683    10777     10693     10698   9948
## Bb011                               12934     8051      7954      7846   6730
## Bb012                               42681    27231     27095     27120  25706
## Bb013                               11694     6172      6087      6070   5640
## Bb014                               25104    15434     15360     15361  14739
## Bb015                               11070     7061      6998      6950   6461
## Bb016                                9634     6078      5987      6002   5604
## Bb017                               12577     7797      7765      7701   7120
## Bb018                               25614    16225     16097     16092  14390
## Bb019                               31221    19040     18970     18977  18257
## Bb020                               31931    20153     20124     20118  19904
## Bb021                               10781     6625      6530      6509   5941
## Bb022                               23477    15270     15257     15176  14903
## Bb023                               12672     8291      8241      8232   7803
## Bb024                               22426    15599     15567     15539  15334
## Bb025                               16294     9402      9355      9272   8909
## Bf001                               26481    18214     18142     18069  17277
## Bf002                                  11        4         3         2      2
## Bf003                               17101    10825     10810     10791  10408
## Bf004                                8560     5494      5444      5443   4920
## Bg001                               11540     6774      6739      6712   6498
## Bg002                               15770     9593      9448      9485   8520
## Bg003                               15898     9878      9781      9748   8735
## Bg004                               13830     8208      8174      8095   7718
## Bg005                               17850    11754     11652     11639  10832
## Bg006                               26159    14258     14174     14148  13762
## Bg007                               20463    13163     13130     13113  12790
## Bg008                               20146    12164     12036     12033  11008
## Bg009                               10615     5238      5170      5141   4349
## Bg010                               10918     6285      6201      6202   5331
## Bg011                               16326    10184     10083     10063   9182
## Bg012                               12514     7157      7132      7089   6877
## Bg013                               21541    12665     12641     12624  12339
## Bg014                               18821    12895     12866     12835  12641
## Bg015                               11683     6987      6880      6899   6085
## Bg016                                8777     5489      5435      5383   4512
## Bg017                                9272     5605      5550      5539   5255
## Bg018                               10971     6740      6707      6602   5896
## Bg019                               13995     8707      8675      8648   8222
## Bi001                               17228    10410     10360     10338  10061
## Bi002                               21579    13359     13263     13248  12684
## Bi003                               31512    20553     20368     20383  19108
## Bi004                               31006    18623     18578     18561  17896
## Bi005                                 231      149       149       149    149
## Bi006                                7576     4554      4485      4491   4144
## Bi007                               15583     9976      9803      9857   8545
## CKC0001                             14879     7833      7821      7806   7779
## ESE0004                             14474     9116      9071      9032   8599
## ext_neg_ctrl_20230909                   7        3         3         3      3
## ext_neg_ctrl_20231007                   6        3         1         1      0
## ext_neg_ctrl_20231008                  12        1         1         1      0
## ext_neg_ctrl_2024220A                 122       37        27        29     27
## ext_neg_ctrl_2024220B                3295     2165      2165      2160   2144
## ext_neg_ctrl_2024221A                4432     2878      2876      2866   2821
## ext_neg_ctrl_2024221B                 965      622       618       618    614
## ext_neg_ctrl_2024222A                1298      823       823       818    799
## ext_neg_ctrl_2024222B                9567     6313      6306      6278   6184
## ext_neg_ctrl_2024312A                   1        1         1         1      1
## ext_neg_ctrl_2024312B                  19        6         4         1      0
## ext_neg_ctrl_2024314A                   8        1         1         1      1
## ext_neg_ctrl_2024314B                   6        1         1         1      0
## ext_neg_ctrl_2024319                  321      180       179       175    175
## KLS0007                             14456     8407      8298      8272   6860
## KLS0027                             12663     7425      7348      7374   6783
## KLS0044                              1683      941       927       922    766
## KLS0045                             30890    16853     16780     16733  16093
## KLS0052                              1867     1057      1057      1052    940
## KLS0054                              1859      991       984       981    978
## KLS0055                              2718     1345      1343      1337   1337
## KLS0071                             16589    10060     10036     10009   9979
## KLS0095                             25992    18665     18599     18598  18351
## KLS0096                             18985    11780     11767     11666  11311
## KLS0105                             30704    20059     20023     20025  19757
## KLS0106                             21639    13590     13511     13428  12565
## KLS0119                             18103    10904     10886     10821  10561
## KLS0134                             14814    10331     10273     10244   9992
## KLS0135                              2499     1762      1743      1744   1696
## KLS0136                             14868    10072     10028      9985   9599
## KLS0137                             35079    21450     21279     21251  20079
## KLS0138                             15471    10763     10691     10689  10400
## KLS0139                             15646    10490     10352     10411   9792
## KLS0150                             20779    13943     13870     13799  13361
## KLS0153                              4518     3156      3096      3071   2838
## KLS0155                             24698    15382     15337     15287  14857
## KLS0156                             38485    27268     27185     27143  26398
## KLS0159                             29323    19126     19041     18995  18135
## KLS0163                             12985     8259      8140      8140   7228
## KLS0165                             11827     7382      7292      7288   6811
## KLS0167                             19510    12356     12174     12254  11137
## KLS0168                             14958     9986      9974      9946   9854
## KLS0169                             27471    18064     17958     17904  16743
## KLS0170                             14696     9654      9601      9563   9242
## KLS0200                             22264    14566     14546     14519  14436
## KLS0201                             37844    23716     23574     23573  22697
## KLS0205                             18576    12367     12237     12283  11685
## KLS0209                              4309     2911      2863      2881   2637
## KLS0221                             13628     8200      8124      8082   7470
## KLS0224                             17970    11029     11014     10979  10857
## KLS0225                             23809    15454     15374     15340  14527
## KLS0227                             14436     9704      9677      9664   9450
## KLS0241                             23414    13922     13877     13831  10693
## KLS0244                             18600    12161     12043     12004  11180
## KLS0246                              8419     5407      5325      5285   4622
## KLS0248                             38123    23791     23723     23639  22213
## KLS0253                             23821    14833     14730     14767  13987
## KLS0254                             14063     9069      9000      8960   8393
## KLS0256                             40058    26403     26269     26300  25267
## KLS0259                             20424    14649     14583     14590  14312
## KLS0263                              2180     1481      1446      1449   1373
## KLS0266                              1355      896       880       870    760
## KLS0272                              1546      977       948       920    649
## pcr_rbcL_neg_crtl_20240417           3607     2444      2442      2435   2387
## pcr_rbcL_neg_ctrl_20240409            739      462       456       455    455
## pcr_rbcL_neg_ctrl_20240418A         13703     8900      8877      8865   8326
## pcr_rbcL_neg_ctrl_20240418B            65       43        41        42     41
## pcr_rbcL_neg_ctrl_20240523              8        1         1         1      1
## rbcL_pcr_neg_ctrl_20231021_20231119     4        3         3         3      3
## SCA0009                             29024    18374     18307     18239  17203
## SCA0010                             25298    13370     13327     13300  12666
## SCA0013                             28147    18429     18350     18294  17553
##                                     nonchim nocontam
## 2020_6_16_H1                           8279     8177
## 2020_6_16_H5                           2607     2483
## 2020_6_16_H6                            624      624
## 2020_6_17_H2                           1457     1401
## 2020_6_17_H4                           1552      565
## 2020_6_17_H8                          12591    10665
## 2020_6_18_H3                           1815     1378
## 2020_6_18_H7                           6681     6286
## 2020_6_18_H9                           3374     3374
## 2020_6_3_H1                            9942     9942
## 2020_6_3_H5                            8789     8789
## 2020_6_3_H6                           10650    10650
## 2020_6_30_H1                           7926     6732
## 2020_6_30_H5                           7195     5406
## 2020_6_30_H6                           8380     8361
## 2020_6_4_H2                             385      376
## 2020_6_4_H4                             128      128
## 2020_6_4_H8                            2555     2555
## 2020_6_5_H3                           11929    11898
## 2020_6_5_H7                            8294     8167
## 2020_6_5_H9                            7920     7904
## 2020_7_1_H2                            8040     5782
## 2020_7_1_H4                               0        0
## 2020_7_1_H8                            9310     9310
## 2020_7_14_H5                              0        0
## 2020_7_14_H6                              0        0
## 2020_7_15_H8                           6488     6455
## 2020_7_16_H3                          27551     8157
## 2020_7_16_H7                           7549     5478
## 2020_7_16_H9                              0        0
## 2020_7_2_H3                            5700     4849
## 2020_7_2_H7                            6642     4713
## 2020_7_2_H9                            9961     7286
## 2021_6_13_H1                          10348    10348
## 2021_6_13_H3                           6680     6602
## 2021_6_14_H11                         10904    10901
## 2021_6_14_H6                           7844     7350
## 2021_6_14_H7                           9317     9317
## 2021_6_15_H8                           9964     9302
## 2021_6_21_H10                         11744     8315
## 2021_6_21_H12                         13511    12582
## 2021_6_21_H9                          10439     8404
## 2021_6_27_H21                          9185     5980
## 2021_6_27_H22                         19164    19164
## 2021_6_27_H27                          6141     5137
## 2021_6_28_H25                          6965     6695
## 2021_6_28_H26                          3640      802
## 2021_6_28_H28                          6997     3915
## 2021_6_29_H17                          7297     7297
## 2021_6_29_H23                          7563     5425
## 2021_6_29_H24                           617      582
## 2021_6_4_H21                           7094     6994
## 2021_6_4_H22                           2999     2731
## 2021_6_4_H27                           3177     3128
## 2021_6_5_H18                            449      362
## 2021_6_5_H25                            774      725
## 2021_6_5_H26                           1760     1601
## 2021_6_6_H17                           4725     3650
## 2021_6_6_H24                           7054     6076
## 2021_6_7_H23                           7382     6575
## 2021_7_14_H10                         21714    11236
## 2021_7_14_H12                             0        0
## 2021_7_20_H27                          8309     6414
## 2021_7_21_H25                         11212    11210
## 2021_7_21_H26                             0        0
## 2021_7_6_H11                           9544     8894
## 2021_7_6_H6                            8352     5715
## 2021_7_7_H8                           14781    14781
## 2021_7_8_H3                            9647     9647
## 2023_6_12_H3                           5634     5613
## 2023_6_12_H5                          10982    10976
## 2023_6_12_H7                           6500     6500
## 2023_6_13_H6                           9514     7280
## 2023_6_13_H8                           5040     4468
## 2023_6_13_H9                           2599     2599
## 2023_6_14_H3                          10736    10693
## 2023_6_14_H7                           8631     8560
## 2023_6_14_H9                           8873     8860
## 2023_6_16_H5                           9508     5727
## 2023_6_24_H6                           4954     1172
## 2023_6_24_H8                           5793     1864
## 2023_6_25_H2                           7663     4178
## 2023_6_25_H4                           2442      878
## 2023_6_26_H1                           2779      874
## 2023_6_26_H7                           5443     2535
## 2023_6_27_H3                           3938     1914
## 2023_6_27_H5                           6623     4960
## 2023_6_8_H1                            6601     6601
## 2023_6_8_H2                            8098     8098
## 2023_6_8_H4                            8210     8210
## 2023_6_9_H2                           14406     8875
## 2023_6_9_H4                            3123     3123
## 2023_7_15_H6                          13934     2963
## 2023_7_16_H4                          10299     1411
## 2023_7_17_H1                          14924     3362
## 2023_7_18_H3                           3232     1419
## 2023_7_18_H7                           7201      873
## 2023_7_29_H5                           6143     4478
## 2023_7_29_H7                          14458    13571
## 2023_7_30_H8                           8449     8443
## 2023_7_30_H9                           2728     1957
## 2023_7_5_H1                            5314     5250
## 2023_7_5_H2                            2501     1793
## 2023_7_6_H6                            8076     2387
## 2023_7_6_H8                           12899     4945
## 2023_7_6_H9                           15133     4645
## 2023_7_8_H3                            5136     2405
## 2023_7_8_H5                           11004     1748
## 2023_7_8_H7                           16530     5152
## 2023_8_4_H2                           13926    13845
## 2023_8_4_H5                            6645     4455
## 2023_8_4_H6                           10165    10109
## 2023_8_4_H7                           10218    10168
## 2023_8_4_H8                           10096     7856
## 2023_8_4_H9                             148       69
## Ba001                                 10735     2683
## Ba002                                  5874     5843
## Ba003                                  7290     7290
## Bb001                                 12686    12686
## Bb002                                 17920    17920
## Bb003                                 20886    20886
## Bb004                                 16668    16568
## Bb005                                  9485     9485
## Bb007                                  9314     9314
## Bb008                                  5416     5416
## Bb009                                  6545     6545
## Bb010                                  8729     2199
## Bb011                                  4980     3914
## Bb012                                 20156    20109
## Bb013                                  5121     5097
## Bb014                                 12842    12842
## Bb015                                  5575     5575
## Bb016                                  5204     5204
## Bb017                                  5388     5379
## Bb018                                  8472     4683
## Bb019                                 16352    16131
## Bb020                                 19859    19846
## Bb021                                  5627     5131
## Bb022                                 14011    14011
## Bb023                                  5929     5929
## Bb024                                 14775    14725
## Bb025                                  8429      475
## Bf001                                 16047    16028
## Bf002                                     2        2
## Bf003                                  8362     8362
## Bf004                                  3827     3827
## Bg001                                  6409      312
## Bg002                                  6296     3172
## Bg003                                  7275     2451
## Bg004                                  7541     7480
## Bg005                                  9368     9286
## Bg006                                 13380     2003
## Bg007                                 12149    11883
## Bg008                                  9618     3687
## Bg009                                  3160     3040
## Bg010                                  4258     4077
## Bg011                                  8073     1652
## Bg012                                  6559      462
## Bg013                                 11934    11775
## Bg014                                 12426    12426
## Bg015                                  4976     4976
## Bg016                                  3391     2777
## Bg017                                  4909      437
## Bg018                                  4658     1161
## Bg019                                  7516      620
## Bi001                                  9685      912
## Bi002                                 11735    11612
## Bi003                                 16124    16031
## Bi004                                 16694    16615
## Bi005                                   149      149
## Bi006                                  3862     3862
## Bi007                                  5970     5970
## CKC0001                                7745     7745
## ESE0004                                7488      458
## ext_neg_ctrl_20230909                     3        3
## ext_neg_ctrl_20231007                     0        0
## ext_neg_ctrl_20231008                     0        0
## ext_neg_ctrl_2024220A                     0        0
## ext_neg_ctrl_2024220B                  1733     1093
## ext_neg_ctrl_2024221A                  2215      645
## ext_neg_ctrl_2024221B                   451      170
## ext_neg_ctrl_2024222A                   677      276
## ext_neg_ctrl_2024222B                  5438     2672
## ext_neg_ctrl_2024312A                     0        0
## ext_neg_ctrl_2024312B                     0        0
## ext_neg_ctrl_2024314A                     0        0
## ext_neg_ctrl_2024314B                     0        0
## ext_neg_ctrl_2024319                    175      175
## KLS0007                                5097     5097
## KLS0027                                4897     4897
## KLS0044                                 584      584
## KLS0045                               14281    14281
## KLS0052                                 610      610
## KLS0054                                 978      978
## KLS0055                                1337     1337
## KLS0071                                9895     9895
## KLS0095                               18048    18048
## KLS0096                               10370    10370
## KLS0105                               19096    19096
## KLS0106                               10127    10127
## KLS0119                               10030    10030
## KLS0134                                9671     9671
## KLS0135                                1679     1679
## KLS0136                                7675     7675
## KLS0137                               17127    17127
## KLS0138                               10060    10060
## KLS0139                                8704     8704
## KLS0150                                6269     6269
## KLS0153                                2572     2572
## KLS0155                               13716    13716
## KLS0156                               22161    22161
## KLS0159                               15892    15892
## KLS0163                                6614     6614
## KLS0165                                6442     6442
## KLS0167                                8730     8730
## KLS0168                                9667     9667
## KLS0169                               13848    13848
## KLS0170                                8583     8583
## KLS0200                               14417    14417
## KLS0201                               20647    20647
## KLS0205                                8580     8580
## KLS0209                                2388     2388
## KLS0221                                6793     6793
## KLS0224                               10778    10778
## KLS0225                               13061    13061
## KLS0227                                9319     9319
## KLS0241                                7979     7979
## KLS0244                                9360     9360
## KLS0246                                3233     3233
## KLS0248                               15023    15023
## KLS0253                               10099    10099
## KLS0254                                7769     5585
## KLS0256                               20145     6456
## KLS0259                               13930    13930
## KLS0263                                1330     1330
## KLS0266                                 659      659
## KLS0272                                 572      440
## pcr_rbcL_neg_crtl_20240417             1942     1430
## pcr_rbcL_neg_ctrl_20240409              455      455
## pcr_rbcL_neg_ctrl_20240418A            7333     3917
## pcr_rbcL_neg_ctrl_20240418B              41       41
## pcr_rbcL_neg_ctrl_20240523                0        0
## rbcL_pcr_neg_ctrl_20231021_20231119       3        3
## SCA0009                               14600    14600
## SCA0010                               11500    11500
## SCA0013                               15651    15619
head(track)
##              input filtered denoisedF denoisedR merged nonchim nocontam
## 2020_6_16_H1 16617     9241      9210      9190   8965    8279     8177
## 2020_6_16_H5  9459     5929      5806      5833   4858    2607     2483
## 2020_6_16_H6  1296      850       842       831    761     624      624
## 2020_6_17_H2  6917     4559      4480      4452   2982    1457     1401
## 2020_6_17_H4  3027     2027      1988      1991   1830    1552      565
## 2020_6_17_H8 24212    15437     15362     15357  14791   12591    10665
track<-as.data.frame(track)

library(tidyverse)
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact()      masks XVector::compact()
## ✖ purrr::compose()      masks ShortRead::compose()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id()           masks ShortRead::id()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks GenomicAlignments::last()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks XVector::slice(), IRanges::slice()
## ✖ tibble::view()        masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
head(track %>% mutate(loss=(input-nocontam)/input)) # calculate % of reads lost from input to final non-chimeric reads
##              input filtered denoisedF denoisedR merged nonchim nocontam
## 2020_6_16_H1 16617     9241      9210      9190   8965    8279     8177
## 2020_6_16_H5  9459     5929      5806      5833   4858    2607     2483
## 2020_6_16_H6  1296      850       842       831    761     624      624
## 2020_6_17_H2  6917     4559      4480      4452   2982    1457     1401
## 2020_6_17_H4  3027     2027      1988      1991   1830    1552      565
## 2020_6_17_H8 24212    15437     15362     15357  14791   12591    10665
##                   loss
## 2020_6_16_H1 0.5079136
## 2020_6_16_H5 0.7374987
## 2020_6_16_H6 0.5185185
## 2020_6_17_H2 0.7974555
## 2020_6_17_H4 0.8133465
## 2020_6_17_H8 0.5595159
head(track %>% filter(str_starts(rownames(.),'ext')|str_starts(rownames(.),'pcr')|str_starts(rownames(.),'rbcL'))) # select just negative control samples
##                       input filtered denoisedF denoisedR merged nonchim
## ext_neg_ctrl_20230909     7        3         3         3      3       3
## ext_neg_ctrl_20231007     6        3         1         1      0       0
## ext_neg_ctrl_20231008    12        1         1         1      0       0
## ext_neg_ctrl_2024220A   122       37        27        29     27       0
## ext_neg_ctrl_2024220B  3295     2165      2165      2160   2144    1733
## ext_neg_ctrl_2024221A  4432     2878      2876      2866   2821    2215
##                       nocontam
## ext_neg_ctrl_20230909        3
## ext_neg_ctrl_20231007        0
## ext_neg_ctrl_20231008        0
## ext_neg_ctrl_2024220A        0
## ext_neg_ctrl_2024220B     1093
## ext_neg_ctrl_2024221A      645
# calculate mean and sd for number of reads at each step, separated between negative control and unknown samples
t(track %>% 
    mutate(loss=(input-nocontam)/input) %>% 
    group_by(NegCtrl=str_starts(rownames(.),'ext') | str_starts(rownames(.),'pcr') | str_starts(rownames(.),'rbcL')) %>% 
    summarize(across(input:loss, list(mean=mean, sd=sd), .names="{.col}.{.fn}")) %>% round(.,digits=2))
##                    [,1]    [,2]
## NegCtrl            0.00    1.00
## input.mean     16831.54 1909.25
## input.sd        9406.71 3650.45
## filtered.mean  10900.73 1244.35
## filtered.sd     6188.45 2388.04
## denoisedF.mean 10826.32 1241.30
## denoisedF.sd    6163.43 2383.87
## denoisedR.mean 10809.44 1237.70
## denoisedR.sd    6158.79 2378.23
## merged.mean    10215.85 1199.10
## merged.sd       5968.33 2274.75
## nonchim.mean    8306.65 1023.30
## nonchim.sd      5133.02 1986.58
## nocontam.mean   6864.17  544.00
## nocontam.sd     5099.99 1040.34
## loss.mean          0.61    0.76
## loss.sd            0.18    0.25
detach("package:tidyverse") #detaching to avoid conflicts... I'll reload it later when I make plots after taxonomic assignment

Assign taxonomy

The DADA2 package provides a native implementation of the naive Bayesian classifier method for taxonomic assignment. The assignTaxonomy function takes as input a set of sequences to ba classified, and a training set of reference sequences with known taxonomy, and outputs taxonomic assignments with at least minBoot bootstrap confidence.

RBCL Database: Bell, Karen (2021). rbcL July 2021. figshare. Dataset. https://doi.org/10.6084/m9.figshare.14936007.v1

“We downloaded all available seed plant rbcL sequences (as of 27 January 2016) from NCBI, using the following search: (rbcL[Gene Name] AND 50:400000000[Sequence Length]) AND”seed plants”[porgn:_txid58024]. This included sequences that were predominantly rbcL, sequences with a small fragment of rbcL sequence along with a longer sequence of intergenic spacer, and complete ptDNA genomes.” (Bell et al 2017)

Load reference databases

rbcL.ref.spp<-"/scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.species.fa"

rbcL.ref.tax<-"/scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa"

Implement assignTaxonomy function

# start 3:08 pm, finished like 4:41pm (locally)
Sys.time(); t1=Sys.time()
## [1] "2024-11-06 17:44:56 EST"
taxa.rbcl <- assignTaxonomy(getSequences(seqtab.nochim.nocontam), rbcL.ref.tax, multithread = TRUE)
## Warning in .Call2("fasta_index", filexp_list, nrec, skip, seek.first.rec, :
## reading FASTA file
## /scratch/kls7sg/Bioinformatics/ReferenceDatabases/rbcL-KarenBell_2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa:
## ignored 24379 invalid one-letter sequence codes
Sys.time(); t2=Sys.time()
## [1] "2024-11-06 17:45:37 EST"
#if your reference file is in the incorrect format for assignTaxonomy, check out this webpage: https://benjjneb.github.io/dada2/training.html

t2-t1
## Time difference of 41.67148 secs
#Time difference of 36.02247 secs (on Rivanna with 24 cores and multithread=TRUE)

#Warning message:
#In .Call2("fasta_index", filexp_list, nrec, skip, seek.first.rec,  :
#  reading FASTA file /scratch/kls7sg/Bioinformatics/rbcL-KarenBell-2021.07.08/rbcL_plus_Nov2019adds_Jul2021corrections.dada2.fa: ignored 24379 invalid one-letter sequence codes
  #This message indicates that you have non-ACGT characters in your custom taxonomy reference file. Is that file appropriately formatted? It's possible this could interfere with your results if the non-ACGT characters are needed to represent important taxa.

View the taxonomic assignment of all ASV sequences

taxa.rbcl.print <- taxa.rbcl; rownames(taxa.rbcl.print) <- NULL  # Removing sequence rownames for display only
head(taxa.rbcl.print)
##      Kingdom                  Phylum                  Class                   
## [1,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"  
## [2,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"  
## [3,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
## [4,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__rosids_71275"  
## [5,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
## [6,] "k__Viridiplantae_33090" "p__Streptophyta_35493" "c__sub__asterids_71274"
##      Order                  Family                     Genus              
## [1,] "o__Fabales_72025"     "f__Fabaceae_3803"         "g__Trifolium_3898"
## [2,] "o__Fabales_72025"     "f__Fabaceae_3803"         "g__Cercis_49800"  
## [3,] "o__Lamiales_4143"     "f__Plantaginaceae_156152" "g__Plantago_26867"
## [4,] "o__Malpighiales_3646" "f__Salicaceae_3688"       "g__Salix_40685"   
## [5,] "o__Lamiales_4143"     "f__Lamiaceae_4136"        "g__Lamium_53158"  
## [6,] "o__Dipsacales_4199"   "f__Caprifoliaceae_4200"   "g__Lonicera_49606"
##      Species                       
## [1,] "s__Trifolium repens_3899"    
## [2,] "s__Cercis siliquastrum_49802"
## [3,] "s__Plantago lanceolata_39414"
## [4,] NA                            
## [5,] "s__Lamium purpureum_53164"   
## [6,] "s__Lonicera maackii_51255"

Linking taxonomic assignment of ASVs to sample sequence table

rbcl.seq <- as.data.frame(t(seqtab.nochim.nocontam)) #sample sequence table; transpose columns to rows (so each sequence appears as a row)
rbcl.taxa <- as.data.frame(taxa.rbcl) #assigned sequence taxonomy

#do sample sequences appear in the same order as identified sequences?
identical(rownames(rbcl.seq), rownames(rbcl.taxa)) # is true, so we proceed
## [1] TRUE
match(rownames(rbcl.seq),rownames(rbcl.taxa)) #this function returns the index where the first argument matches the second argument; if the lists are identical, a sequential list of integers up to the total number of records being compared
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
# bind sample sequence table with assigned sequence taxonomy
rbcL.IDs <- cbind(rbcl.seq,rbcl.taxa)
# rownames(rbcL.IDs) <- NULL #remove ASV row names (skipping this for now)

#rename samples (remove "rbcL_")
names(rbcL.IDs)
##   [1] "rbcL_2020_6_16_H1"                       
##   [2] "rbcL_2020_6_16_H5"                       
##   [3] "rbcL_2020_6_16_H6"                       
##   [4] "rbcL_2020_6_17_H2"                       
##   [5] "rbcL_2020_6_17_H4"                       
##   [6] "rbcL_2020_6_17_H8"                       
##   [7] "rbcL_2020_6_18_H3"                       
##   [8] "rbcL_2020_6_18_H7"                       
##   [9] "rbcL_2020_6_18_H9"                       
##  [10] "rbcL_2020_6_3_H1"                        
##  [11] "rbcL_2020_6_3_H5"                        
##  [12] "rbcL_2020_6_3_H6"                        
##  [13] "rbcL_2020_6_30_H1"                       
##  [14] "rbcL_2020_6_30_H5"                       
##  [15] "rbcL_2020_6_30_H6"                       
##  [16] "rbcL_2020_6_4_H2"                        
##  [17] "rbcL_2020_6_4_H4"                        
##  [18] "rbcL_2020_6_4_H8"                        
##  [19] "rbcL_2020_6_5_H3"                        
##  [20] "rbcL_2020_6_5_H7"                        
##  [21] "rbcL_2020_6_5_H9"                        
##  [22] "rbcL_2020_7_1_H2"                        
##  [23] "rbcL_2020_7_1_H4"                        
##  [24] "rbcL_2020_7_1_H8"                        
##  [25] "rbcL_2020_7_14_H5"                       
##  [26] "rbcL_2020_7_14_H6"                       
##  [27] "rbcL_2020_7_15_H8"                       
##  [28] "rbcL_2020_7_16_H3"                       
##  [29] "rbcL_2020_7_16_H7"                       
##  [30] "rbcL_2020_7_16_H9"                       
##  [31] "rbcL_2020_7_2_H3"                        
##  [32] "rbcL_2020_7_2_H7"                        
##  [33] "rbcL_2020_7_2_H9"                        
##  [34] "rbcL_2021_6_13_H1"                       
##  [35] "rbcL_2021_6_13_H3"                       
##  [36] "rbcL_2021_6_14_H11"                      
##  [37] "rbcL_2021_6_14_H6"                       
##  [38] "rbcL_2021_6_14_H7"                       
##  [39] "rbcL_2021_6_15_H8"                       
##  [40] "rbcL_2021_6_21_H10"                      
##  [41] "rbcL_2021_6_21_H12"                      
##  [42] "rbcL_2021_6_21_H9"                       
##  [43] "rbcL_2021_6_27_H21"                      
##  [44] "rbcL_2021_6_27_H22"                      
##  [45] "rbcL_2021_6_27_H27"                      
##  [46] "rbcL_2021_6_28_H25"                      
##  [47] "rbcL_2021_6_28_H26"                      
##  [48] "rbcL_2021_6_28_H28"                      
##  [49] "rbcL_2021_6_29_H17"                      
##  [50] "rbcL_2021_6_29_H23"                      
##  [51] "rbcL_2021_6_29_H24"                      
##  [52] "rbcL_2021_6_4_H21"                       
##  [53] "rbcL_2021_6_4_H22"                       
##  [54] "rbcL_2021_6_4_H27"                       
##  [55] "rbcL_2021_6_5_H18"                       
##  [56] "rbcL_2021_6_5_H25"                       
##  [57] "rbcL_2021_6_5_H26"                       
##  [58] "rbcL_2021_6_6_H17"                       
##  [59] "rbcL_2021_6_6_H24"                       
##  [60] "rbcL_2021_6_7_H23"                       
##  [61] "rbcL_2021_7_14_H10"                      
##  [62] "rbcL_2021_7_14_H12"                      
##  [63] "rbcL_2021_7_20_H27"                      
##  [64] "rbcL_2021_7_21_H25"                      
##  [65] "rbcL_2021_7_21_H26"                      
##  [66] "rbcL_2021_7_6_H11"                       
##  [67] "rbcL_2021_7_6_H6"                        
##  [68] "rbcL_2021_7_7_H8"                        
##  [69] "rbcL_2021_7_8_H3"                        
##  [70] "rbcL_2023_6_12_H3"                       
##  [71] "rbcL_2023_6_12_H5"                       
##  [72] "rbcL_2023_6_12_H7"                       
##  [73] "rbcL_2023_6_13_H6"                       
##  [74] "rbcL_2023_6_13_H8"                       
##  [75] "rbcL_2023_6_13_H9"                       
##  [76] "rbcL_2023_6_14_H3"                       
##  [77] "rbcL_2023_6_14_H7"                       
##  [78] "rbcL_2023_6_14_H9"                       
##  [79] "rbcL_2023_6_16_H5"                       
##  [80] "rbcL_2023_6_24_H6"                       
##  [81] "rbcL_2023_6_24_H8"                       
##  [82] "rbcL_2023_6_25_H2"                       
##  [83] "rbcL_2023_6_25_H4"                       
##  [84] "rbcL_2023_6_26_H1"                       
##  [85] "rbcL_2023_6_26_H7"                       
##  [86] "rbcL_2023_6_27_H3"                       
##  [87] "rbcL_2023_6_27_H5"                       
##  [88] "rbcL_2023_6_8_H1"                        
##  [89] "rbcL_2023_6_8_H2"                        
##  [90] "rbcL_2023_6_8_H4"                        
##  [91] "rbcL_2023_6_9_H2"                        
##  [92] "rbcL_2023_6_9_H4"                        
##  [93] "rbcL_2023_7_15_H6"                       
##  [94] "rbcL_2023_7_16_H4"                       
##  [95] "rbcL_2023_7_17_H1"                       
##  [96] "rbcL_2023_7_18_H3"                       
##  [97] "rbcL_2023_7_18_H7"                       
##  [98] "rbcL_2023_7_29_H5"                       
##  [99] "rbcL_2023_7_29_H7"                       
## [100] "rbcL_2023_7_30_H8"                       
## [101] "rbcL_2023_7_30_H9"                       
## [102] "rbcL_2023_7_5_H1"                        
## [103] "rbcL_2023_7_5_H2"                        
## [104] "rbcL_2023_7_6_H6"                        
## [105] "rbcL_2023_7_6_H8"                        
## [106] "rbcL_2023_7_6_H9"                        
## [107] "rbcL_2023_7_8_H3"                        
## [108] "rbcL_2023_7_8_H5"                        
## [109] "rbcL_2023_7_8_H7"                        
## [110] "rbcL_2023_8_4_H2"                        
## [111] "rbcL_2023_8_4_H5"                        
## [112] "rbcL_2023_8_4_H6"                        
## [113] "rbcL_2023_8_4_H7"                        
## [114] "rbcL_2023_8_4_H8"                        
## [115] "rbcL_2023_8_4_H9"                        
## [116] "rbcL_Ba001"                              
## [117] "rbcL_Ba002"                              
## [118] "rbcL_Ba003"                              
## [119] "rbcL_Bb001"                              
## [120] "rbcL_Bb002"                              
## [121] "rbcL_Bb003"                              
## [122] "rbcL_Bb004"                              
## [123] "rbcL_Bb005"                              
## [124] "rbcL_Bb007"                              
## [125] "rbcL_Bb008"                              
## [126] "rbcL_Bb009"                              
## [127] "rbcL_Bb010"                              
## [128] "rbcL_Bb011"                              
## [129] "rbcL_Bb012"                              
## [130] "rbcL_Bb013"                              
## [131] "rbcL_Bb014"                              
## [132] "rbcL_Bb015"                              
## [133] "rbcL_Bb016"                              
## [134] "rbcL_Bb017"                              
## [135] "rbcL_Bb018"                              
## [136] "rbcL_Bb019"                              
## [137] "rbcL_Bb020"                              
## [138] "rbcL_Bb021"                              
## [139] "rbcL_Bb022"                              
## [140] "rbcL_Bb023"                              
## [141] "rbcL_Bb024"                              
## [142] "rbcL_Bb025"                              
## [143] "rbcL_Bf001"                              
## [144] "rbcL_Bf002"                              
## [145] "rbcL_Bf003"                              
## [146] "rbcL_Bf004"                              
## [147] "rbcL_Bg001"                              
## [148] "rbcL_Bg002"                              
## [149] "rbcL_Bg003"                              
## [150] "rbcL_Bg004"                              
## [151] "rbcL_Bg005"                              
## [152] "rbcL_Bg006"                              
## [153] "rbcL_Bg007"                              
## [154] "rbcL_Bg008"                              
## [155] "rbcL_Bg009"                              
## [156] "rbcL_Bg010"                              
## [157] "rbcL_Bg011"                              
## [158] "rbcL_Bg012"                              
## [159] "rbcL_Bg013"                              
## [160] "rbcL_Bg014"                              
## [161] "rbcL_Bg015"                              
## [162] "rbcL_Bg016"                              
## [163] "rbcL_Bg017"                              
## [164] "rbcL_Bg018"                              
## [165] "rbcL_Bg019"                              
## [166] "rbcL_Bi001"                              
## [167] "rbcL_Bi002"                              
## [168] "rbcL_Bi003"                              
## [169] "rbcL_Bi004"                              
## [170] "rbcL_Bi005"                              
## [171] "rbcL_Bi006"                              
## [172] "rbcL_Bi007"                              
## [173] "rbcL_CKC0001"                            
## [174] "rbcL_ESE0004"                            
## [175] "rbcL_ext_neg_ctrl_20230909"              
## [176] "rbcL_ext_neg_ctrl_20231007"              
## [177] "rbcL_ext_neg_ctrl_20231008"              
## [178] "rbcL_ext_neg_ctrl_2024220A"              
## [179] "rbcL_ext_neg_ctrl_2024220B"              
## [180] "rbcL_ext_neg_ctrl_2024221A"              
## [181] "rbcL_ext_neg_ctrl_2024221B"              
## [182] "rbcL_ext_neg_ctrl_2024222A"              
## [183] "rbcL_ext_neg_ctrl_2024222B"              
## [184] "rbcL_ext_neg_ctrl_2024312A"              
## [185] "rbcL_ext_neg_ctrl_2024312B"              
## [186] "rbcL_ext_neg_ctrl_2024314A"              
## [187] "rbcL_ext_neg_ctrl_2024314B"              
## [188] "rbcL_ext_neg_ctrl_2024319"               
## [189] "rbcL_KLS0007"                            
## [190] "rbcL_KLS0027"                            
## [191] "rbcL_KLS0044"                            
## [192] "rbcL_KLS0045"                            
## [193] "rbcL_KLS0052"                            
## [194] "rbcL_KLS0054"                            
## [195] "rbcL_KLS0055"                            
## [196] "rbcL_KLS0071"                            
## [197] "rbcL_KLS0095"                            
## [198] "rbcL_KLS0096"                            
## [199] "rbcL_KLS0105"                            
## [200] "rbcL_KLS0106"                            
## [201] "rbcL_KLS0119"                            
## [202] "rbcL_KLS0134"                            
## [203] "rbcL_KLS0135"                            
## [204] "rbcL_KLS0136"                            
## [205] "rbcL_KLS0137"                            
## [206] "rbcL_KLS0138"                            
## [207] "rbcL_KLS0139"                            
## [208] "rbcL_KLS0150"                            
## [209] "rbcL_KLS0153"                            
## [210] "rbcL_KLS0155"                            
## [211] "rbcL_KLS0156"                            
## [212] "rbcL_KLS0159"                            
## [213] "rbcL_KLS0163"                            
## [214] "rbcL_KLS0165"                            
## [215] "rbcL_KLS0167"                            
## [216] "rbcL_KLS0168"                            
## [217] "rbcL_KLS0169"                            
## [218] "rbcL_KLS0170"                            
## [219] "rbcL_KLS0200"                            
## [220] "rbcL_KLS0201"                            
## [221] "rbcL_KLS0205"                            
## [222] "rbcL_KLS0209"                            
## [223] "rbcL_KLS0221"                            
## [224] "rbcL_KLS0224"                            
## [225] "rbcL_KLS0225"                            
## [226] "rbcL_KLS0227"                            
## [227] "rbcL_KLS0241"                            
## [228] "rbcL_KLS0244"                            
## [229] "rbcL_KLS0246"                            
## [230] "rbcL_KLS0248"                            
## [231] "rbcL_KLS0253"                            
## [232] "rbcL_KLS0254"                            
## [233] "rbcL_KLS0256"                            
## [234] "rbcL_KLS0259"                            
## [235] "rbcL_KLS0263"                            
## [236] "rbcL_KLS0266"                            
## [237] "rbcL_KLS0272"                            
## [238] "rbcL_pcr_rbcL_neg_crtl_20240417"         
## [239] "rbcL_pcr_rbcL_neg_ctrl_20240409"         
## [240] "rbcL_pcr_rbcL_neg_ctrl_20240418A"        
## [241] "rbcL_pcr_rbcL_neg_ctrl_20240418B"        
## [242] "rbcL_pcr_rbcL_neg_ctrl_20240523"         
## [243] "rbcL_rbcL_pcr_neg_ctrl_20231021_20231119"
## [244] "rbcL_SCA0009"                            
## [245] "rbcL_SCA0010"                            
## [246] "rbcL_SCA0013"                            
## [247] "Kingdom"                                 
## [248] "Phylum"                                  
## [249] "Class"                                   
## [250] "Order"                                   
## [251] "Family"                                  
## [252] "Genus"                                   
## [253] "Species"
names(rbcL.IDs) <- sub("^rbcL_", "", names(rbcL.IDs)) #remove the "rbcL_" at beginning of column names
names(rbcL.IDs)
##   [1] "2020_6_16_H1"                       
##   [2] "2020_6_16_H5"                       
##   [3] "2020_6_16_H6"                       
##   [4] "2020_6_17_H2"                       
##   [5] "2020_6_17_H4"                       
##   [6] "2020_6_17_H8"                       
##   [7] "2020_6_18_H3"                       
##   [8] "2020_6_18_H7"                       
##   [9] "2020_6_18_H9"                       
##  [10] "2020_6_3_H1"                        
##  [11] "2020_6_3_H5"                        
##  [12] "2020_6_3_H6"                        
##  [13] "2020_6_30_H1"                       
##  [14] "2020_6_30_H5"                       
##  [15] "2020_6_30_H6"                       
##  [16] "2020_6_4_H2"                        
##  [17] "2020_6_4_H4"                        
##  [18] "2020_6_4_H8"                        
##  [19] "2020_6_5_H3"                        
##  [20] "2020_6_5_H7"                        
##  [21] "2020_6_5_H9"                        
##  [22] "2020_7_1_H2"                        
##  [23] "2020_7_1_H4"                        
##  [24] "2020_7_1_H8"                        
##  [25] "2020_7_14_H5"                       
##  [26] "2020_7_14_H6"                       
##  [27] "2020_7_15_H8"                       
##  [28] "2020_7_16_H3"                       
##  [29] "2020_7_16_H7"                       
##  [30] "2020_7_16_H9"                       
##  [31] "2020_7_2_H3"                        
##  [32] "2020_7_2_H7"                        
##  [33] "2020_7_2_H9"                        
##  [34] "2021_6_13_H1"                       
##  [35] "2021_6_13_H3"                       
##  [36] "2021_6_14_H11"                      
##  [37] "2021_6_14_H6"                       
##  [38] "2021_6_14_H7"                       
##  [39] "2021_6_15_H8"                       
##  [40] "2021_6_21_H10"                      
##  [41] "2021_6_21_H12"                      
##  [42] "2021_6_21_H9"                       
##  [43] "2021_6_27_H21"                      
##  [44] "2021_6_27_H22"                      
##  [45] "2021_6_27_H27"                      
##  [46] "2021_6_28_H25"                      
##  [47] "2021_6_28_H26"                      
##  [48] "2021_6_28_H28"                      
##  [49] "2021_6_29_H17"                      
##  [50] "2021_6_29_H23"                      
##  [51] "2021_6_29_H24"                      
##  [52] "2021_6_4_H21"                       
##  [53] "2021_6_4_H22"                       
##  [54] "2021_6_4_H27"                       
##  [55] "2021_6_5_H18"                       
##  [56] "2021_6_5_H25"                       
##  [57] "2021_6_5_H26"                       
##  [58] "2021_6_6_H17"                       
##  [59] "2021_6_6_H24"                       
##  [60] "2021_6_7_H23"                       
##  [61] "2021_7_14_H10"                      
##  [62] "2021_7_14_H12"                      
##  [63] "2021_7_20_H27"                      
##  [64] "2021_7_21_H25"                      
##  [65] "2021_7_21_H26"                      
##  [66] "2021_7_6_H11"                       
##  [67] "2021_7_6_H6"                        
##  [68] "2021_7_7_H8"                        
##  [69] "2021_7_8_H3"                        
##  [70] "2023_6_12_H3"                       
##  [71] "2023_6_12_H5"                       
##  [72] "2023_6_12_H7"                       
##  [73] "2023_6_13_H6"                       
##  [74] "2023_6_13_H8"                       
##  [75] "2023_6_13_H9"                       
##  [76] "2023_6_14_H3"                       
##  [77] "2023_6_14_H7"                       
##  [78] "2023_6_14_H9"                       
##  [79] "2023_6_16_H5"                       
##  [80] "2023_6_24_H6"                       
##  [81] "2023_6_24_H8"                       
##  [82] "2023_6_25_H2"                       
##  [83] "2023_6_25_H4"                       
##  [84] "2023_6_26_H1"                       
##  [85] "2023_6_26_H7"                       
##  [86] "2023_6_27_H3"                       
##  [87] "2023_6_27_H5"                       
##  [88] "2023_6_8_H1"                        
##  [89] "2023_6_8_H2"                        
##  [90] "2023_6_8_H4"                        
##  [91] "2023_6_9_H2"                        
##  [92] "2023_6_9_H4"                        
##  [93] "2023_7_15_H6"                       
##  [94] "2023_7_16_H4"                       
##  [95] "2023_7_17_H1"                       
##  [96] "2023_7_18_H3"                       
##  [97] "2023_7_18_H7"                       
##  [98] "2023_7_29_H5"                       
##  [99] "2023_7_29_H7"                       
## [100] "2023_7_30_H8"                       
## [101] "2023_7_30_H9"                       
## [102] "2023_7_5_H1"                        
## [103] "2023_7_5_H2"                        
## [104] "2023_7_6_H6"                        
## [105] "2023_7_6_H8"                        
## [106] "2023_7_6_H9"                        
## [107] "2023_7_8_H3"                        
## [108] "2023_7_8_H5"                        
## [109] "2023_7_8_H7"                        
## [110] "2023_8_4_H2"                        
## [111] "2023_8_4_H5"                        
## [112] "2023_8_4_H6"                        
## [113] "2023_8_4_H7"                        
## [114] "2023_8_4_H8"                        
## [115] "2023_8_4_H9"                        
## [116] "Ba001"                              
## [117] "Ba002"                              
## [118] "Ba003"                              
## [119] "Bb001"                              
## [120] "Bb002"                              
## [121] "Bb003"                              
## [122] "Bb004"                              
## [123] "Bb005"                              
## [124] "Bb007"                              
## [125] "Bb008"                              
## [126] "Bb009"                              
## [127] "Bb010"                              
## [128] "Bb011"                              
## [129] "Bb012"                              
## [130] "Bb013"                              
## [131] "Bb014"                              
## [132] "Bb015"                              
## [133] "Bb016"                              
## [134] "Bb017"                              
## [135] "Bb018"                              
## [136] "Bb019"                              
## [137] "Bb020"                              
## [138] "Bb021"                              
## [139] "Bb022"                              
## [140] "Bb023"                              
## [141] "Bb024"                              
## [142] "Bb025"                              
## [143] "Bf001"                              
## [144] "Bf002"                              
## [145] "Bf003"                              
## [146] "Bf004"                              
## [147] "Bg001"                              
## [148] "Bg002"                              
## [149] "Bg003"                              
## [150] "Bg004"                              
## [151] "Bg005"                              
## [152] "Bg006"                              
## [153] "Bg007"                              
## [154] "Bg008"                              
## [155] "Bg009"                              
## [156] "Bg010"                              
## [157] "Bg011"                              
## [158] "Bg012"                              
## [159] "Bg013"                              
## [160] "Bg014"                              
## [161] "Bg015"                              
## [162] "Bg016"                              
## [163] "Bg017"                              
## [164] "Bg018"                              
## [165] "Bg019"                              
## [166] "Bi001"                              
## [167] "Bi002"                              
## [168] "Bi003"                              
## [169] "Bi004"                              
## [170] "Bi005"                              
## [171] "Bi006"                              
## [172] "Bi007"                              
## [173] "CKC0001"                            
## [174] "ESE0004"                            
## [175] "ext_neg_ctrl_20230909"              
## [176] "ext_neg_ctrl_20231007"              
## [177] "ext_neg_ctrl_20231008"              
## [178] "ext_neg_ctrl_2024220A"              
## [179] "ext_neg_ctrl_2024220B"              
## [180] "ext_neg_ctrl_2024221A"              
## [181] "ext_neg_ctrl_2024221B"              
## [182] "ext_neg_ctrl_2024222A"              
## [183] "ext_neg_ctrl_2024222B"              
## [184] "ext_neg_ctrl_2024312A"              
## [185] "ext_neg_ctrl_2024312B"              
## [186] "ext_neg_ctrl_2024314A"              
## [187] "ext_neg_ctrl_2024314B"              
## [188] "ext_neg_ctrl_2024319"               
## [189] "KLS0007"                            
## [190] "KLS0027"                            
## [191] "KLS0044"                            
## [192] "KLS0045"                            
## [193] "KLS0052"                            
## [194] "KLS0054"                            
## [195] "KLS0055"                            
## [196] "KLS0071"                            
## [197] "KLS0095"                            
## [198] "KLS0096"                            
## [199] "KLS0105"                            
## [200] "KLS0106"                            
## [201] "KLS0119"                            
## [202] "KLS0134"                            
## [203] "KLS0135"                            
## [204] "KLS0136"                            
## [205] "KLS0137"                            
## [206] "KLS0138"                            
## [207] "KLS0139"                            
## [208] "KLS0150"                            
## [209] "KLS0153"                            
## [210] "KLS0155"                            
## [211] "KLS0156"                            
## [212] "KLS0159"                            
## [213] "KLS0163"                            
## [214] "KLS0165"                            
## [215] "KLS0167"                            
## [216] "KLS0168"                            
## [217] "KLS0169"                            
## [218] "KLS0170"                            
## [219] "KLS0200"                            
## [220] "KLS0201"                            
## [221] "KLS0205"                            
## [222] "KLS0209"                            
## [223] "KLS0221"                            
## [224] "KLS0224"                            
## [225] "KLS0225"                            
## [226] "KLS0227"                            
## [227] "KLS0241"                            
## [228] "KLS0244"                            
## [229] "KLS0246"                            
## [230] "KLS0248"                            
## [231] "KLS0253"                            
## [232] "KLS0254"                            
## [233] "KLS0256"                            
## [234] "KLS0259"                            
## [235] "KLS0263"                            
## [236] "KLS0266"                            
## [237] "KLS0272"                            
## [238] "pcr_rbcL_neg_crtl_20240417"         
## [239] "pcr_rbcL_neg_ctrl_20240409"         
## [240] "pcr_rbcL_neg_ctrl_20240418A"        
## [241] "pcr_rbcL_neg_ctrl_20240418B"        
## [242] "pcr_rbcL_neg_ctrl_20240523"         
## [243] "rbcL_pcr_neg_ctrl_20231021_20231119"
## [244] "SCA0009"                            
## [245] "SCA0010"                            
## [246] "SCA0013"                            
## [247] "Kingdom"                            
## [248] "Phylum"                             
## [249] "Class"                              
## [250] "Order"                              
## [251] "Family"                             
## [252] "Genus"                              
## [253] "Species"

Plots and summaries of taxonomic assignments

library(tidyverse)
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact()      masks XVector::compact()
## ✖ purrr::compose()      masks ShortRead::compose()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::id()           masks ShortRead::id()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ dplyr::last()         masks GenomicAlignments::last()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks GenomicAlignments::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks XVector::slice(), IRanges::slice()
## ✖ tibble::view()        masks ShortRead::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Create long data from rbcL.IDs (sample reads by ASV, with taxa assigned)

Transpose rbcL.IDs (ASV table by sample) into long format after calculating total reads for each ASV (each row in rbcL.IDs). After transposing, calculate total reads for each Sample

rbcL.IDs<-rbcL.IDs %>% mutate(ASVTotalReads = select(., '2020_6_16_H1': 'SCA0013') %>% rowSums()) # total ASV reads

rbcL.IDs.long<-rbcL.IDs %>% pivot_longer(cols = c(where(is.numeric), -ASVTotalReads), names_to = "Sample", values_to = "Reads") %>% filter(Reads>0) %>% group_by(Sample) %>% mutate(SampleTotalReads=sum(Reads)) # total Sample reads

#rbcL.IDs<-partial_join(rbcL.IDs,___sample-info-data___,"Sample", "SampleName") #join sample info if you want/have it

Total reads? Total Samples? Reads per sample?

1,928,317 reads across all samples (1,898,063 reads after removing low-abundance ASVs) ((1,562,182 reads after also removing contaminant sequences)) 237 samples with 1+ reads (232 after removing low-abundance ASVs) 1 to 28,062 reads per sample (mean = 8.1K)

sum(rbcL.IDs$ASVTotalReads) #1,928,317 reads across all samples (before removing ASVs with less than 100 total reads) #1,898,063 reads after removing low-abundance ASVs #1,562,182 reads after also removing contaminant sequences
## [1] 1562182
rbcL.IDs %>% select(where(is.numeric), -ASVTotalReads) %>% colnames(.) %>% n_distinct(.) # 246 samples (but 9 of them have 0 reads)
## [1] 246
n_distinct(rbcL.IDs.long$Sample) # 237 samples (before removing ASVs with less than 100 total reads) #232 reads after removing low-abundance ASVs
## [1] 232
temp<-as.data.frame(rbcL.IDs %>% select(where(is.numeric), -ASVTotalReads) %>% colSums(.)) # sum up all the reads for all samples that appear in the rbcL.IDs dataset (basically an ASV table by sample with taxonomic ids)
colnames(temp)<-"TotalReads" #rename column
temp %>% filter(TotalReads==0) # filter to view just samples with 0 reads (these samples get dropped from the data when this dataset is transformed long into rbcL.IDs.long)
##                            TotalReads
## 2020_7_1_H4                         0
## 2020_7_14_H5                        0
## 2020_7_14_H6                        0
## 2020_7_16_H9                        0
## 2021_7_14_H12                       0
## 2021_7_21_H26                       0
## ext_neg_ctrl_20231007               0
## ext_neg_ctrl_20231008               0
## ext_neg_ctrl_2024220A               0
## ext_neg_ctrl_2024312A               0
## ext_neg_ctrl_2024312B               0
## ext_neg_ctrl_2024314A               0
## ext_neg_ctrl_2024314B               0
## pcr_rbcL_neg_ctrl_20240523          0
rm(temp)

hist(rbcL.IDs.long %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>% select(-Sample) %>% pull(SumReads), xlab="SampleReads", main=NULL) # 1 to ~30,000 reads per sample

summary(rbcL.IDs.long %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>% select(-Sample))
##     SumReads    
##  Min.   :    2  
##  1st Qu.: 2475  
##  Median : 5975  
##  Mean   : 6734  
##  3rd Qu.: 9668  
##  Max.   :22161

Plots: reads per sample and total reads by sample vs control

# plot of reads per sample for __negative control samples__ (color coded by above/below 2K reads)
rbcL.IDs.long %>% filter(str_starts(Sample,'ext')|str_starts(Sample,'pcr')|str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
  ggplot(aes(x=Sample,y=SumReads, fill=SumReads<2000))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

#plot of reads per sample for __unknown samples__ (color coded by below/above 2K reads)
rbcL.IDs.long %>% filter(!str_starts(Sample,'ext')&!str_starts(Sample,'pcr')&!str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
  ggplot(aes(x=Sample,y=SumReads, fill=SumReads>2000))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

#histogram for __unk samples__
rbcL.IDs.long %>% filter(!str_starts(Sample,'ext')&!str_starts(Sample,'pcr')&!str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
  ggplot(aes(SumReads))+
  geom_histogram(binwidth = 1000, color="black")

#histogram for __neg ctrls__
rbcL.IDs.long %>% filter(str_starts(Sample,'ext')|str_starts(Sample,'pcr')|str_starts(Sample,'rbcL')) %>% group_by(Sample) %>% summarise(SumReads=sum(Reads)) %>%
  ggplot(aes(SumReads))+
  geom_histogram(binwidth = 250, color="black")

How many ASVs total? How many unassigned?

952 total ASVs across all samples (260 ASVs after removing low-abund ASVs) ((251 ASVs after also removing contaminant sequences)) 432 of all ASVs were not assigned to species ((69 of all ASVs were not assigned to species after removing low abund and contaminating sequences))

length(rownames(rbcL.IDs)) # 952 total ASVs across all samples before removing low-abund ASVs #(260 ASVs after removing low-abund ASVs) #((251 ASVs after also removing contaminant sequences))
## [1] 251
#(numbers in parentheses below reflect totals after low-abund ASV removal)
rbcL.IDs%>%filter(is.na(Family)) %>% summarize(n=n()) # 149 (4) of all ASVs were not assigned to family
##   n
## 1 3
rbcL.IDs%>%filter(is.na(Genus)) %>% summarize(n=n()) # 242 (31) ((30)) of all ASVs were not assigned to genus
##    n
## 1 28
rbcL.IDs%>%filter(is.na(Species)) %>% summarize(n=n()) # 432 (69) of all ASVs were not assigned to species
##    n
## 1 72

How many reads per ASV?

most ASVs have very few total reads there are a handful of common species assigned to most ASVs, but also a lot of reads whose ASV could not be assigned

# most ASVs have very few total reads
ggplot(rbcL.IDs, aes(x=ASVTotalReads))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(most common species!) How many reads for each ASV species assignment?

There are a handful of common species assigned to most ASVs, but also a lot of reads whose ASV could not be assigned

ggplot(rbcL.IDs, aes(x=Species,y=ASVTotalReads))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

What percent of reads assigned to spp overall?

Overall, about 80% of reads were assigned to species

rbcL.IDs %>% filter(is.na(Species)) %>% summarize(sum=sum(ASVTotalReads)) #329,977 reads unassigned to species (out of 1,928,317 reads across all samples total reads)
##      sum
## 1 295661
#after removing low-abund ASVs, there are 285,984 reads unassigned to species (out of 1,898,063 reads total)
#after also removing contaminating sequences, there are 265,340 reads unassigned to species (out of 1,562,182 reads total)

(rbcL.IDs %>% filter(!is.na(Species)) %>% summarize(sum=sum(ASVTotalReads)))/1562182 # ~83% of reads assigned to species; this value may change as the total project reads (denominator) or total assigned reads (numerator) changes with different upstream QC, filtering parameters
##         sum
## 1 0.8107384

What percent of reads assigned to spp for each sample?

some samples with a LOT of unassigned reads how many Unk ASVs per sample

ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=!is.na(Species)))+
  geom_col(position = "fill")+
  labs(x="", y="Proportion of Reads")+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

#some samples with a LOT of unassigned reads

rbcL.IDs.long %>% filter(is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(UnkRichness=sum(is.na(Species))) %>%
  ggplot(aes(x=Sample, y=UnkRichness))+
  geom_col()+
  labs(x="",y="Unk ASV Richness")

# how many Unk ASVs per sample

rbcL.IDs.long %>% group_by(Sample) %>% dplyr::summarize(UnkRichness = sum(is.na(Species)), KnownRichness = sum(!is.na(Species)), UnkProp = UnkRichness/(UnkRichness+KnownRichness))
## # A tibble: 232 × 4
##    Sample       UnkRichness KnownRichness UnkProp
##    <chr>              <int>         <int>   <dbl>
##  1 2020_6_16_H1           0             5   0    
##  2 2020_6_16_H5           0             5   0    
##  3 2020_6_16_H6           0             2   0    
##  4 2020_6_17_H2           0             7   0    
##  5 2020_6_17_H4           0             5   0    
##  6 2020_6_17_H8           0             8   0    
##  7 2020_6_18_H3           0             5   0    
##  8 2020_6_18_H7           1             2   0.333
##  9 2020_6_18_H9           0             1   0    
## 10 2020_6_30_H1           0             7   0    
## # ℹ 222 more rows
rbcL.IDs.long %>% group_by(Sample) %>% dplyr::summarize(UnkRichness = sum(is.na(Species)), KnownRichness = sum(!is.na(Species)), UnkProp = UnkRichness/(UnkRichness+KnownRichness)) %>%
  ggplot(aes(x=Sample, y=UnkProp))+
  geom_col()+
  labs(x="",y="Proportion of ASVs Unidentified to Species")

How many unassigned ASVs have more than 1000 reads?

#histogram of ASVs unassigned to species (with more than 1000 reads) 
rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads)%>%
  ggplot(aes(ASVTotalReads))+
  geom_histogram(binwidth = 10000, color="black")

#these 23 ASVs represent ~75% of the Species=NA reads (recall: 329,977 reads unassigned to species)
head(rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads)) # common (>1000 reads) ASVs that were not assigned to species (23 ASVs that comprise a total of 247,220 reads)
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                   ASVTotalReads
## AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA           104923
## AACCTGGAGTTCCACCTGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCTACTGGTACATGGACAACTGTATGGACTGACGGTCTTACCAGTCTTGATCGTTACAAAGGTCGATGCTACCACATCGAGCCTGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTAGGTAATGTGTTTGGGTTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTGTTGCTTATGTTAAAACTTTCCAGGGCCCGCCTCATGGTATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGCCGCCCTCTATTGGGATGTACTATAAAACCAAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA            34522
## AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCCACTGGGACATGGACAACTGTGTGGACTGACGGGCTTACCAGTCTTGATCGTTACAAAGGACGATGCTACCACATCGAGCCGGTTGCTGGAGAAGAAAATCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTCTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGATTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTACTTCTTATTCTAAAACTTTCCAAGGTCCGCCTCATGGCATCCAAGTTGAGAGGGATAAATTAAACAAGTATGGCCGCCCCCTATTAGGATGTACTATTAAACCTAAATTGGGATTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTC            19868
## AACCTGGAGTTCCACCTGAAGAAGCAGGGGCTGCGGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGGTGCTACCACATCGAACCCGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGAAATGTATTTGGATTCAAAGCCCTGCGTGCTCTACGTCTGGAAGATCTGCGAATCCCTACTGCTTATACTAAAACTTTCCAAGGCCCGCCTCATGGCATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGTCGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGGTTATCTGCTAAAAACTATGGTAGAGCAGTTTATGAATGTCTC            18890
## CGCAACCTGGAGTTCCGGCTGAAGAAGCAGGTGCAGCGGTAGCTGCCGAATCTTCCACTGGGACATGGACAACTGTGTGGACCGATGGACTTACCAGTCTTGATCGTTATAAAGGACGCTGCTACCACATCGAACCTGTTGCTGGAGAAGAGACTCAATTTATTGCTTATGTAGCTTATCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACCTCCATTGTAGGTAATGTATTTGGGTTCAAGGCCTTGCGTGCTCTACGTCTGGAAGATTTGCGAATCCCCGTTGCTTATGTTAAAACTTTCCAAGGTCCTCCTCACGGAATCCAAGTTGAGAGAGATAAATTGAACAAATATGGACGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGTTTATCCGCTAAAAATTACGGTAGAGCAGTTTATGAATGTCTA         16182
## AACCTGGAGTTCCTCCTGAAGAAGCAGGGGCCGCAGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGATGCTACCACATCGATGCCGTTCCGGGAGAAGAAAATCAATATATATGTTATGTAGCTTATCCTTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATCGTAGGTAATGTATTTGGGTTCAAAGCCCTGCGCGCTCTACGTCTGGAAGATCTGCGAATCCCTCCTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGGATCCAAGTTGAAAGAGATAAATTGAACAAGTACGGCCGTCCTTTGTTGGGATGTACTATTAAACCTAAATTGGGCTTATCCGCTAAAAACTACGGTAGAGCAGTTTATGAATGTCTT            12438
length((rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads))$ASVTotalReads)
## [1] 26
sum(rbcL.IDs %>% filter(is.na(Species)&ASVTotalReads>1000) %>% select(ASVTotalReads))
## [1] 277642

What percent of ASV richness by sample is left unassigned?

A mean of 30% of ASVs per sample are unassigned to species, but only a mean of 5% of ASVs per sample are unassigned to Family.

# creating a summary table called "ASVs" to count the number of (un)assigned reads and taxonomic richness for each sample
a<-rbcL.IDs.long %>% group_by(Sample) %>% summarise(SampleTotalReads=sum(Reads))
b<-rbcL.IDs.long %>% group_by(Sample) %>% summarise(CountASVs=n())
c<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% filter(is.na(Species)) %>% summarise(ASVs_NoSpp=n())
d<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% filter(is.na(Family)) %>% summarise(ASVs_NoFam=n())
e<-rbcL.IDs.long %>% group_by(Sample, .drop=FALSE) %>% summarise(Families=n_distinct(Family), Genera=n_distinct(Genus))

ASVs <- cbind(a, b[,2],c[,2],d[,2],e[,-1])
ASVs<- ASVs %>% mutate(PercNoSpp = (ASVs_NoSpp/CountASVs)*100, PercNoFam=(ASVs_NoFam/CountASVs)*100) # number and percent of ASVs not assigned to species or family

summary(ASVs)
##     Sample          SampleTotalReads   CountASVs       ASVs_NoSpp   
##  Length:232         Min.   :    2    Min.   : 1.00   Min.   :0.000  
##  Class :character   1st Qu.: 2475    1st Qu.: 5.00   1st Qu.:1.000  
##  Mode  :character   Median : 5975    Median : 8.00   Median :2.000  
##                     Mean   : 6734    Mean   :10.27   Mean   :2.806  
##                     3rd Qu.: 9668    3rd Qu.:14.00   3rd Qu.:5.000  
##                     Max.   :22161    Max.   :30.00   Max.   :9.000  
##    ASVs_NoFam        Families          Genera         PercNoSpp    
##  Min.   :0.0000   Min.   : 1.000   Min.   : 1.000   Min.   : 0.00  
##  1st Qu.:0.0000   1st Qu.: 3.000   1st Qu.: 4.000   1st Qu.:11.11  
##  Median :0.0000   Median : 5.000   Median : 6.000   Median :25.00  
##  Mean   :0.2543   Mean   : 6.181   Mean   : 6.767   Mean   :23.19  
##  3rd Qu.:0.0000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.:33.33  
##  Max.   :3.0000   Max.   :18.000   Max.   :21.000   Max.   :66.67  
##    PercNoFam    
##  Min.   : 0.00  
##  1st Qu.: 0.00  
##  Median : 0.00  
##  Mean   : 1.60  
##  3rd Qu.: 0.00  
##  Max.   :16.67
  ggplot(ASVs, aes(x=Sample, y=PercNoSpp))+
  geom_col()

  ggplot(ASVs, aes(x=Sample, y=PercNoFam))+
  geom_col()

How many species per sample?

mean = 6.3 species (range = 1-20 species) per sample

rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species))
## # A tibble: 232 × 2
##    Sample       Richness
##    <chr>           <int>
##  1 2020_6_16_H1        4
##  2 2020_6_16_H5        5
##  3 2020_6_16_H6        2
##  4 2020_6_17_H2        5
##  5 2020_6_17_H4        5
##  6 2020_6_17_H8        5
##  7 2020_6_18_H3        3
##  8 2020_6_18_H7        2
##  9 2020_6_18_H9        1
## 10 2020_6_30_H1        5
## # ℹ 222 more rows
summary(rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species)))
##     Sample             Richness    
##  Length:232         Min.   : 1.00  
##  Class :character   1st Qu.: 3.00  
##  Mode  :character   Median : 5.00  
##                     Mean   : 6.25  
##                     3rd Qu.: 9.00  
##                     Max.   :20.00
rbcL.IDs.long %>% filter(!is.na(Species)) %>% group_by(Sample) %>% dplyr::summarize(Richness=n_distinct(Species)) %>%
  ggplot(aes(x=Sample, y=Richness))+
  geom_col()+
  labs(x="",y="Spp Richness")+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

How many reads per species per sample? And proportion of reads per species per sample

# numbers of reads per species per sample
ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+
  geom_col()+
  labs(x="", y="Num of Reads")+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1), legend.position="none")

#the legend for the above plot
grid::grid.newpage()
grid::grid.draw(cowplot::get_legend(ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+geom_col()))
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.

# proportion of reads per species per sample
ggplot(rbcL.IDs.long,aes(x=Sample, y=Reads, fill=Species))+
  geom_bar(position="fill", stat="identity") +
  labs(x="", y="Prop of Reads")+
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1), legend.position="none")

Rarefaction with phyloseq

OTU <- otu_table(seqtab.nochim.nocontam, taxa_are_rows = F, errorIfNULL=TRUE)
TAX <- tax_table(taxa.rbcl)
physeq = phyloseq(OTU, TAX)
physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 251 taxa and 246 samples ]
## tax_table()   Taxonomy Table:    [ 251 taxa by 7 taxonomic ranks ]
slotNames(physeq)
## [1] "otu_table" "tax_table" "sam_data"  "phy_tree"  "refseq"
# example of what you can do with phyloseq object, physeq:
# make plots:
# plot_bar(physeq, fill = "Species") # this is basically the same as the plot under 'how many spp per sample?'


class(OTU) <- "matrix" # as.matrix() will do nothing
## Warning in class(OTU) <- "matrix": Setting class(x) to "matrix" sets attribute
## to NULL; result will no longer be an S4 object
vegan::rarecurve(OTU, step = 50, xlab = "Sample Size", ylab = "Species", label = TRUE, tidy=T) %>%
  ggplot(aes(x=Sample, y=Species, col=Site))+
  geom_line()+
  labs(x="Read Depth", y="ASVs detected", col="")+
  theme(legend.position = "none")+
  lims(x=c(0,30000),y=c(0,30))
## empty rows removed

Identify taxa missing from rbcL reference database

identify families that are not being identified to species

Histogram of read numbers for each ASV

Using threshold of 1000 reads to designate ‘abundant’ vs ‘non-abundant’ ASVs from “How many unassigned ASVs have more than 1000 reads?” ASVs with this many total reads, but which were unassigned to species should be pulled out for futher investigation

rbcL.IDs.long %>% ggplot(aes(x=Reads)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

rbcL.IDs.long %>% filter(Reads<1000) %>% ggplot(aes(x=Reads)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Identify abundant unknown ASVs

Here extract the sequences of abundant (more than 100 reads) but unidentified (Family or Species is NA) ASVs

rbcL.IDs.long %>% filter(Reads>1000 & is.na(Species)) %>% ggplot(aes(x=Sample, fill=Family))+
  geom_bar() +  
  theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))

#count sequences of abundant (more than 1000 reads) but unidentified ASVs (Species or Family is NA)
rbcL.IDs.long %>% filter(Reads>1000 & is.na(Family)) # 1 ASVs
## # A tibble: 1 × 11
## # Groups:   Sample [1]
##   Kingdom     Phylum Class Order Family Genus Species ASVTotalReads Sample Reads
##   <chr>       <chr>  <chr> <chr> <chr>  <chr> <chr>           <dbl> <chr>  <int>
## 1 k__Viridip… p__St… c__s… o__L… <NA>   <NA>  <NA>            10131 Bg007   9559
## # ℹ 1 more variable: SampleTotalReads <int>
dim(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Family))) # 2 ASVs
## [1]   2 254
dim(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))) # 23 ASVs
## [1]  26 254
#extract sequences of abundant unidentified ASVs
head(rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))))
## [1] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"   
## [2] "AACCTGGAGTTCCACCTGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCTACTGGTACATGGACAACTGTATGGACTGACGGTCTTACCAGTCTTGATCGTTACAAAGGTCGATGCTACCACATCGAGCCTGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTAGGTAATGTGTTTGGGTTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTGTTGCTTATGTTAAAACTTTCCAGGGCCCGCCTCATGGTATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGCCGCCCTCTATTGGGATGTACTATAAAACCAAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"   
## [3] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCTGAATCTTCCACTGGGACATGGACAACTGTGTGGACTGACGGGCTTACCAGTCTTGATCGTTACAAAGGACGATGCTACCACATCGAGCCGGTTGCTGGAGAAGAAAATCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTCTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGATTCAAGGCCCTGCGCGCTCTACGTCTGGAGGATTTGCGAATCCCTACTTCTTATTCTAAAACTTTCCAAGGTCCGCCTCATGGCATCCAAGTTGAGAGGGATAAATTAAACAAGTATGGCCGCCCCCTATTAGGATGTACTATTAAACCTAAATTGGGATTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTC"   
## [4] "AACCTGGAGTTCCACCTGAAGAAGCAGGGGCTGCGGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGGTGCTACCACATCGAACCCGTTGCTGGAGAAGAAAGTCAATTTATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGAAATGTATTTGGATTCAAAGCCCTGCGTGCTCTACGTCTGGAAGATCTGCGAATCCCTACTGCTTATACTAAAACTTTCCAAGGCCCGCCTCATGGCATCCAAGTTGAGAGAGATAAATTGAACAAGTATGGTCGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGGTTATCTGCTAAAAACTATGGTAGAGCAGTTTATGAATGTCTC"   
## [5] "CGCAACCTGGAGTTCCGGCTGAAGAAGCAGGTGCAGCGGTAGCTGCCGAATCTTCCACTGGGACATGGACAACTGTGTGGACCGATGGACTTACCAGTCTTGATCGTTATAAAGGACGCTGCTACCACATCGAACCTGTTGCTGGAGAAGAGACTCAATTTATTGCTTATGTAGCTTATCCCTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACCTCCATTGTAGGTAATGTATTTGGGTTCAAGGCCTTGCGTGCTCTACGTCTGGAAGATTTGCGAATCCCCGTTGCTTATGTTAAAACTTTCCAAGGTCCTCCTCACGGAATCCAAGTTGAGAGAGATAAATTGAACAAATATGGACGTCCCCTATTGGGATGTACTATTAAACCTAAATTGGGTTTATCCGCTAAAAATTACGGTAGAGCAGTTTATGAATGTCTA"
## [6] "AACCTGGAGTTCCTCCTGAAGAAGCAGGGGCCGCAGTAGCTGCCGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGATGGACTTACCAGCCTTGATCGTTACAAAGGGCGATGCTACCACATCGATGCCGTTCCGGGAGAAGAAAATCAATATATATGTTATGTAGCTTATCCTTTAGACCTTTTTGAAGAAGGTTCTGTTACTAACATGTTTACTTCCATCGTAGGTAATGTATTTGGGTTCAAAGCCCTGCGCGCTCTACGTCTGGAAGATCTGCGAATCCCTCCTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGGATCCAAGTTGAAAGAGATAAATTGAACAAGTACGGCCGTCCTTTGTTGGGATGTACTATTAAACCTAAATTGGGCTTATCCGCTAAAAACTACGGTAGAGCAGTTTATGAATGTCTT"
rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species)))[1] #the first ASV
## [1] "AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"
#"AACCTGGAGTTCCGCCCGAGGAAGCAGGGGCCGCGGTAGCTGCGGAATCTTCTACTGGTACATGGACAACTGTGTGGACCGACGGGCTTACCAGTCTTGATCGTTATAAGGGACGATGCTACGACATCGAGCCTGTTGCTGGAGAAGAAAATCAATATATTGCTTATGTAGCTTACCCCTTAGACCTTTTTGAGGAAGGTTCTGTTACTAACATGTTTACTTCCATTGTGGGTAATGTATTTGGGTTCAAAGCCCTACGCGCTCTACGTCTGGAGGATTTGCGAATTCCTGTTGCTTATGTTAAAACTTTCCAAGGCCCACCTCATGGTATCCAAGTTGAGAGAGATAAATTGAATAAGTATGGTCGCCCCCTATTGGGCTGTACTATTAAACCTAAATTGGGGTTATCCGCTAAGAATTACGGTAGAGCAGTTTATGAATGTCTA"

BLAST search for the first ASV came back with 100% identity to several Salix species, including S. nigera

View all ASVs with >1000 reads unassigned to species

knitr::kable(rbcL.IDs %>%
               select(Kingdom:ASVTotalReads) %>%
               filter(ASVTotalReads>1000 & is.na(Species)),
             row.names = FALSE)
Kingdom Phylum Class Order Family Genus Species ASVTotalReads
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Malpighiales_3646 f__Salicaceae_3688 g__Salix_40685 NA 104923
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Rosales_3744 f__Rosaceae_3745 NA NA 34522
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Fagales_3502 f__Fagaceae_3503 NA NA 19868
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Cornales_41934 f__Cornaceae_42219 g__Nyssa_4290 NA 18890
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Fabales_72025 f__Fabaceae_3803 g__Medicago_3877 NA 16182
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Boraginales_1538097 f__Boraginaceae_21571 NA NA 12438
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Lamiales_4143 NA NA NA 10131
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Asterales_4209 f__Asteraceae_4210 NA NA 10017
k__Viridiplantae_33090 p__Streptophyta_35493 c__undef__5 o__Ranunculales_41768 f__Papaveraceae_3465 g__Dicentra_22680 NA 7202
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Lamiales_4143 f__Lamiaceae_4136 NA NA 7021
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Dipsacales_4199 f__Adoxaceae_4206 g__Viburnum_4204 NA 5322
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Vitales_403667 f__Vitaceae_3602 NA NA 5111
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Ericales_41945 f__Ericaceae_4345 g__Vaccinium_13749 NA 3138
k__Viridiplantae_33090 p__Streptophyta_35493 c__undef__5 o__Ranunculales_41768 f__Papaveraceae_3465 NA NA 3068
k__Viridiplantae_33090 p__Streptophyta_35493 NA NA NA NA NA 2689
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Solanales_4069 f__Solanaceae_4070 g__Solanum_4107 NA 2603
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Brassicales_3699 f__Brassicaceae_3700 g__Hesperis_264417 NA 2467
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Malpighiales_3646 f__Salicaceae_3688 g__Salix_40685 NA 2095
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__Pinidae_3313 o__Pinales_1446380 f__Pinaceae_3318 g__Pinus_3337 NA 1515
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Rosales_3744 f__Rosaceae_3745 g__Spiraea_23224 NA 1376
k__Viridiplantae_33090 p__Streptophyta_35493 c__Liliopsida_4447 o__Asparagales_73496 f__Hyacinthaceae_44985 NA NA 1351
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__rosids_71275 o__Fabales_72025 f__Fabaceae_3803 g__Cercis_49800 NA 1195
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Lamiales_4143 f__Lamiaceae_4136 NA NA 1193
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Lamiales_4143 f__Lamiaceae_4136 g__Lamium_53158 NA 1167
k__Viridiplantae_33090 p__Streptophyta_35493 c__Liliopsida_4447 o__Asparagales_73496 f__Hyacinthaceae_44985 g__Scilla_4701 NA 1148
k__Viridiplantae_33090 p__Streptophyta_35493 c__sub__asterids_71274 o__Apiales_4036 f__Apiaceae_4037 NA NA 1010

Export sequence assignments and accessories for all samples (double check path!)

#writepath:"/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/"
#altwritepath:"/Users/kelseyschoenemann/Desktop/Bioinformatics/RMarkdown/rbcL_bioinformatics_files"

# save all ASVs with >1000 reads unassigned to species
save<-as.data.frame(rownames(rbcL.IDs %>% filter(ASVTotalReads>1000 & is.na(Species))))
write.csv(save, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/UnkSpp_rbcL_ASVs.csv")

# seqtab.nochim.nocontam for phyloseq obj creation
write.csv(seqtab.nochim.nocontam, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/seqtab.nochim.nocontam.csv")

# taxa.rbcl for phyloseq obj creation
write.csv(taxa.rbcl, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/taxa.rbcl.csv")

# rbcL.IDs
write.csv(rbcL.IDs, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/rbcL.IDs.csv")

# rbcL.IDs.long
write.csv(rbcL.IDs.long, file="/scratch/kls7sg/Bioinformatics/RMarkdown/rbcL_bioinformatics_files/rbcL.IDs.long.csv")